Skip to content
Snippets Groups Projects
Verified Commit c84cf77d authored by Dorian Stoll's avatar Dorian Stoll
Browse files

nas-ft: cpp: Make grid size and iterations configurable

parent e0810ef6
Branches baseline
No related tags found
No related merge requests found
......@@ -7,18 +7,6 @@ using namespace std::complex_literals;
class FFT {
public:
// CLASS = C
constexpr static int NX = 512;
constexpr static int NY = 512;
constexpr static int NZ = 512;
constexpr static int MAXDIM = 512;
constexpr static int NITER_DEFAULT = 20;
// total number of grid points with padding
constexpr static int NXP = NX + 1;
constexpr static int NTOTALP = NXP * NY * NZ;
constexpr static double NTOTAL_F = (double)NX * NY * NZ;
// If processor array is 1x1 -> 0D grid decomposition
// Cache blocking params. These values are good for most RISC processors.
......@@ -45,22 +33,38 @@ private:
int m_fftblock;
int m_fftblockpad;
Eigen::Tensor<std::complex<double>, 1> m_u {NXP};
Eigen::Tensor<std::complex<double>, 3> m_u0 {NXP, NY, NZ};
Eigen::Tensor<std::complex<double>, 3> m_u1 {NXP, NY, NZ};
Eigen::Tensor<double, 3> m_twiddle {NXP, NY, NZ};
int m_nx;
int m_ny;
int m_nz;
int m_nxp;
int m_maxdim;
Eigen::Tensor<std::complex<double>, 1> m_u;
Eigen::Tensor<std::complex<double>, 3> m_u0;
Eigen::Tensor<std::complex<double>, 3> m_u1;
Eigen::Tensor<double, 3> m_twiddle;
public:
FFT()
FFT(int nx, int ny, int nz, int iterations)
: m_nx {nx},
m_ny {ny},
m_nz {nz},
m_nxp {nx + 1},
m_maxdim {std::max(nx, std::max(ny, nz))},
m_u {m_nxp},
m_u0 {m_nxp, ny, nz},
m_u1 {m_nxp, ny, nz},
m_twiddle {m_nxp, ny, nz}
{
m_debug = false;
m_niter = NITER_DEFAULT;
m_niter = iterations;
fmt::println("");
fmt::println("");
fmt::println(" NAS Parallel Benchmarks (NPB3.4-OMP) - FT Benchmark");
fmt::println("");
fmt::println(" Size : {}x{}x{}", NX, NY, NZ);
fmt::println(" Size : {}x{}x{}", m_nx, m_ny, m_nz);
fmt::println(" Iterations : {}", m_niter);
fmt::println(" Number of available threads : {}", omp_get_max_threads());
fmt::println("");
......@@ -83,9 +87,6 @@ public:
m_fftblock = FFTBLOCK_DEFAULT;
m_fftblockpad = FFTBLOCKPAD_DEFAULT;
if (m_fftblock != FFTBLOCK_DEFAULT)
m_fftblockpad = m_fftblock + 3;
this->init_ui();
}
......@@ -255,8 +256,8 @@ private:
Eigen::Tensor<std::complex<double>, 3> &x1,
Eigen::Tensor<std::complex<double>, 3> &x2)
{
Eigen::Tensor<std::complex<double>, 2> y1 {m_fftblockpad, MAXDIM};
Eigen::Tensor<std::complex<double>, 2> y2 {m_fftblockpad, MAXDIM};
Eigen::Tensor<std::complex<double>, 2> y1 {m_fftblockpad, m_maxdim};
Eigen::Tensor<std::complex<double>, 2> y2 {m_fftblockpad, m_maxdim};
// ---------------------------------------------------------------------
// note: args x1, x2 must be different arrays
......@@ -513,9 +514,9 @@ private:
#pragma omp for
// clang-format on
for (Eigen::Index j = 1; j <= 1024; j++) {
const Eigen::Index q = j % NX;
const Eigen::Index r = (3 * j) % NY;
const Eigen::Index s = (5 * j) % NZ;
const Eigen::Index q = j % m_nx;
const Eigen::Index r = (3 * j) % m_ny;
const Eigen::Index s = (5 * j) % m_nz;
local += m_u1(q, r, s);
}
......@@ -528,13 +529,20 @@ private:
}
}
chk /= NTOTAL_F;
chk /= (double)(m_nx * m_ny * m_nz);
fmt::println(" T = {} Checksum = {:.10E} {:.10E}", i, chk.real(), chk.imag());
}
};
int main(void)
int main(int argc, char **argv)
{
FFT {}.run();
assert(argc == 5);
int nx = atoi(argv[1]);
int ny = atoi(argv[2]);
int nz = atoi(argv[3]);
int it = atoi(argv[4]);
FFT {nx, ny, nz, it}.run();
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment