From a6b43bc521a4387b3f59ba373cefc49d8fbfe721 Mon Sep 17 00:00:00 2001 From: Dorian Stoll <dorian.stoll@uni-potsdam.de> Date: Wed, 12 Jun 2024 16:20:23 +0200 Subject: [PATCH] rodinia-srad: cpp: Initial port The results are mostly identical with the output from the C reference, but there are some floating point errors that prevent the hashes from matching. --- src/benchmarks/rodinia-srad/cpp/meson.build | 38 ++++ src/benchmarks/rodinia-srad/cpp/srad.cpp | 232 ++++++++++++++++++++ src/benchmarks/rodinia-srad/meson.build | 1 + subprojects/fmt.wrap | 13 ++ 4 files changed, 284 insertions(+) create mode 100644 src/benchmarks/rodinia-srad/cpp/meson.build create mode 100644 src/benchmarks/rodinia-srad/cpp/srad.cpp create mode 100644 subprojects/fmt.wrap diff --git a/src/benchmarks/rodinia-srad/cpp/meson.build b/src/benchmarks/rodinia-srad/cpp/meson.build new file mode 100644 index 00000000..74fbcbf4 --- /dev/null +++ b/src/benchmarks/rodinia-srad/cpp/meson.build @@ -0,0 +1,38 @@ +cc = meson.get_compiler('c') + +sources = [ + 'srad.cpp', +] + +# Build wrapped dependencies as static libraries and disable warnings +dependency_options = [ + 'default_library=static', + 'warning_level=0', + 'werror=false', +] + +dependencies = [ + dependency('openmp'), + dependency( + 'eigen3', + fallback: ['eigen', 'eigen_dep'], + default_options: dependency_options, + include_type: 'system', + ), + dependency( + 'fmt', + fallback: ['fmt', 'fmt_dep'], + default_options: dependency_options, + ), +] + +options = [ + 'cpp_std=c++17', +] + +executable( + 'rodinia-srad-cpp', + sources, + dependencies: dependencies, + override_options: options, +) diff --git a/src/benchmarks/rodinia-srad/cpp/srad.cpp b/src/benchmarks/rodinia-srad/cpp/srad.cpp new file mode 100644 index 00000000..61febf6a --- /dev/null +++ b/src/benchmarks/rodinia-srad/cpp/srad.cpp @@ -0,0 +1,232 @@ +/* + * Eigen defaults to using column-major order for its container types. But the C reference code is + * written for row-major containers. Instead of touching every for-loop in the program, simply + * change the default. + */ +#define EIGEN_DEFAULT_TO_ROW_MAJOR + +#include <Eigen/Eigen> +#include <fmt/format.h> + +class SRAD { +private: + template <class T> + using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>; + + template <class T> + using Vector = Eigen::Vector<T, Eigen::Dynamic>; + +private: + int m_r1; + int m_r2; + int m_c1; + int m_c2; + float m_lambda; + + int m_size_R; + + Matrix<float> m_I {}; + Matrix<float> m_J {}; + Matrix<float> m_c {}; + + Vector<Eigen::Index> m_iN {}; + Vector<Eigen::Index> m_iS {}; + Vector<Eigen::Index> m_jW {}; + Vector<Eigen::Index> m_jE {}; + + Matrix<float> m_dN {}; + Matrix<float> m_dS {}; + Matrix<float> m_dW {}; + Matrix<float> m_dE {}; + +public: + SRAD(const int rows, + const int cols, + const int r1, + const int r2, + const int c1, + const int c2, + const float lambda) + : m_r1 {r1}, + m_r2 {r2}, + m_c1 {c1}, + m_c2 {c2}, + m_lambda {lambda}, + m_size_R {(r2 - r1 + 1) * (c2 - c1 + 1)} + { + if (rows % 16 != 0 || cols % 16 != 0) + throw std::invalid_argument {"rows and cols must be multiples of 16"}; + + m_I.resize(rows, cols); + m_J.resize(rows, cols); + m_c.resize(rows, cols); + + m_iN.resize(rows); + m_iS.resize(rows); + m_jW.resize(cols); + m_jE.resize(cols); + + m_dN.resize(rows, cols); + m_dS.resize(rows, cols); + m_dW.resize(rows, cols); + m_dE.resize(rows, cols); + } + + void run(const int niter) + { + this->fill_indices(); + + fmt::println("Randomizing the input matrix"); + + this->random_matrix(); + m_J.array() = Eigen::exp(m_I.array()); + + fmt::println("Start the SRAD main loop"); + + for (int iter = 0; iter < niter; iter++) + this->iterate(); + + for (Eigen::Index i = 0; i < m_J.rows(); i++) { + for (Eigen::Index j = 0; j < m_J.cols(); j++) + fmt::print("{:.5f} ", m_J(i, j)); + + fmt::println(""); + } + + fmt::println("Computation Done"); + } + +private: + void fill_indices() + { + for (Eigen::Index i = 0; i < m_iN.size(); i++) { + m_iN(i) = i - 1; + m_iS(i) = i + 1; + } + + for (Eigen::Index j = 0; j < m_jW.size(); j++) { + m_jW(j) = j - 1; + m_jE(j) = j + 1; + } + + m_iN(0) = 0; + m_iS(m_iS.size() - 1) = m_iS.size() - 1; + + m_jW(0) = 0; + m_jE(m_jE.size() - 1) = m_jE.size() - 1; + } + + void random_matrix() + { + srand(7); + + for (Eigen::Index i = 0; i < m_I.rows(); i++) { + for (Eigen::Index j = 0; j < m_I.cols(); j++) + m_I(i, j) = rand() / static_cast<float>(RAND_MAX); + } + } + + void iterate() + { + float sum = 0; + float sum2 = 0; + + for (Eigen::Index i = m_r1; i <= m_r2; i++) { + for (Eigen::Index j = m_c1; j <= m_c2; j++) { + const float tmp = m_J(i, j); + + sum += tmp; + sum2 += tmp * tmp; + } + } + + const float meanROI = sum / m_size_R; + const float varROI = (sum2 / m_size_R) - (meanROI * meanROI); + const float q0sqr = varROI / (meanROI * meanROI); + + #pragma omp parallel for + for (Eigen::Index i = 0; i < m_J.rows(); i++) { + for (Eigen::Index j = 0; j < m_J.cols(); j++) { + const float Jc = m_J(i, j); + + // directional derivates + m_dN(i, j) = m_J(m_iN(i), j) - Jc; + m_dS(i, j) = m_J(m_iS(i), j) - Jc; + m_dW(i, j) = m_J(i, m_jW(j)) - Jc; + m_dE(i, j) = m_J(i, m_jE(j)) - Jc; + + const float G2 = + (m_dN(i, j) * m_dN(i, j) + m_dS(i, j) * m_dS(i, j) + + m_dW(i, j) * m_dW(i, j) + m_dE(i, j) * m_dE(i, j)) / + (Jc * Jc); + + const float L = + (m_dN(i, j) + m_dS(i, j) + m_dW(i, j) + m_dE(i, j)) / Jc; + + const float num = (0.5 * G2) - ((1.0 / 16.0) * L * L); + float den = 1 + (0.25 * L); + + const float qsqr = num / (den * den); + + // diffusion coefficent (equ 33) + den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); + m_c(i, j) = std::clamp(1.0 / (1.0 + den), 0.0, 1.0); + } + } + + #pragma omp parallel for + for (Eigen::Index i = 0; i < m_c.rows(); i++) { + for (Eigen::Index j = 0; j < m_c.cols(); j++) { + // diffusion coefficent + const float cN = m_c(i, j); + const float cS = m_c(m_iS(i), j); + const float cW = m_c(i, j); + const float cE = m_c(i, m_jE(j)); + + // divergence (equ 58) + const float D = cN * m_dN(i, j) + cS * m_dS(i, j) + + cW * m_dW(i, j) + cE * m_dE(i, j); + + // image update (equ 61) + m_J(i, j) += 0.25 * m_lambda * D; + } + } + } +}; + +static int usage(const char *program) +{ + // clang-format off + fmt::println(stderr, "Usage: {} <rows> <cols> <y1> <y2> <x1> <x2> <lambda> <no. of iter>", program); + fmt::println(stderr, "\t<rows> - number of rows"); + fmt::println(stderr, "\t<cols> - number of columns"); + fmt::println(stderr, "\t<y1> - y1 value of the speckle"); + fmt::println(stderr, "\t<y2> - y2 value of the speckle"); + fmt::println(stderr, "\t<x1> - x1 value of the speckle"); + fmt::println(stderr, "\t<x2> - x2 value of the speckle"); + fmt::println(stderr, "\t<lambda> - lambda (0, 1)"); + fmt::println(stderr, "\t<no. of iter> - number of iterations"); + // clang-format on + + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + if (argc != 9) + return usage(argv[0]); + + const int rows = std::atoi(argv[1]); // number of rows in the domain + const int cols = std::atoi(argv[2]); // number of cols in the domain + const int r1 = std::atoi(argv[3]); // y1 position of the speckle + const int r2 = std::atoi(argv[4]); // y2 position of the speckle + const int c1 = std::atoi(argv[5]); // x1 position of the speckle + const int c2 = std::atoi(argv[6]); // x2 position of the speckle + const float lambda = std::atof(argv[7]); // Lambda value + const int niter = std::atoi(argv[8]); // number of iterations + + SRAD srad {rows, cols, r1, r2, c1, c2, lambda}; + srad.run(niter); + + return EXIT_SUCCESS; +} diff --git a/src/benchmarks/rodinia-srad/meson.build b/src/benchmarks/rodinia-srad/meson.build index 76d1974f..7259a26e 100644 --- a/src/benchmarks/rodinia-srad/meson.build +++ b/src/benchmarks/rodinia-srad/meson.build @@ -1 +1,2 @@ subdir('c') +subdir('cpp') diff --git a/subprojects/fmt.wrap b/subprojects/fmt.wrap new file mode 100644 index 00000000..4e964601 --- /dev/null +++ b/subprojects/fmt.wrap @@ -0,0 +1,13 @@ +[wrap-file] +directory = fmt-10.2.0 +source_url = https://github.com/fmtlib/fmt/archive/10.2.0.tar.gz +source_filename = fmt-10.2.0.tar.gz +source_hash = 3ca91733a7313a8ad41c0885929415f8ec0a2a31d4dc7e27e9331412f4ca26ac +patch_filename = fmt_10.2.0-2_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/fmt_10.2.0-2/get_patch +patch_hash = 2428c3a386a8390c76378f81ef804a297f4edc3b789499dd56629b7902b8ddb7 +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/fmt_10.2.0-2/fmt-10.2.0.tar.gz +wrapdb_version = 10.2.0-2 + +[provide] +fmt = fmt_dep -- GitLab