rodinia-srad: cpp: Initial port

The results are mostly identical with the output from the C reference, but there are some floating point errors that prevent the hashes from matching.

rodinia-srad: cpp: Initial port
a6b43bc5 · Dorian Stoll · f0405a6c · a6b43bc5 · a6b43bc5 · a6b43bc5
Verified Commit a6b43bc5 authored 9 months ago by Dorian Stoll
--- a/src/benchmarks/rodinia-srad/cpp/meson.build
+++ b/src/benchmarks/rodinia-srad/cpp/meson.build
+cc = meson.get_compiler('c')
+sources = [
+	'srad.cpp',
+]
+# Build wrapped dependencies as static libraries and disable warnings
+dependency_options = [
+	'default_library=static',
+	'warning_level=0',
+	'werror=false',
+]
+dependencies = [
+	dependency('openmp'),
+	dependency(
+		'eigen3',
+		fallback: ['eigen', 'eigen_dep'],
+		default_options: dependency_options,
+		include_type: 'system',
+	),
+	dependency(
+		'fmt',
+		fallback: ['fmt', 'fmt_dep'],
+		default_options: dependency_options,
+	),
+]
+options = [
+	'cpp_std=c++17',
+]
+executable(
+	'rodinia-srad-cpp',
+	sources,
+	dependencies: dependencies,
+	override_options: options,
+)
--- a/src/benchmarks/rodinia-srad/cpp/srad.cpp
+++ b/src/benchmarks/rodinia-srad/cpp/srad.cpp
+/*
+ * Eigen defaults to using column-major order for its container types. But the C reference code is
+ * written for row-major containers. Instead of touching every for-loop in the program, simply
+ * change the default.
+ */
+#define EIGEN_DEFAULT_TO_ROW_MAJOR
+#include <Eigen/Eigen>
+#include <fmt/format.h>
+class SRAD {
+private:
+	template <class T>
+	using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
+	template <class T>
+	using Vector = Eigen::Vector<T, Eigen::Dynamic>;
+private:
+	int m_r1;
+	int m_r2;
+	int m_c1;
+	int m_c2;
+	float m_lambda;
+	int m_size_R;
+	Matrix<float> m_I {};
+	Matrix<float> m_J {};
+	Matrix<float> m_c {};
+	Vector<Eigen::Index> m_iN {};
+	Vector<Eigen::Index> m_iS {};
+	Vector<Eigen::Index> m_jW {};
+	Vector<Eigen::Index> m_jE {};
+	Matrix<float> m_dN {};
+	Matrix<float> m_dS {};
+	Matrix<float> m_dW {};
+	Matrix<float> m_dE {};
+public:
+	SRAD(const int rows,
+	     const int cols,
+	     const int r1,
+	     const int r2,
+	     const int c1,
+	     const int c2,
+	     const float lambda)
+		: m_r1 {r1},
+		  m_r2 {r2},
+		  m_c1 {c1},
+		  m_c2 {c2},
+		  m_lambda {lambda},
+		  m_size_R {(r2 - r1 + 1) * (c2 - c1 + 1)}
+	{
+		if (rows % 16 != 0 || cols % 16 != 0)
+			throw std::invalid_argument {"rows and cols must be multiples of 16"};
+		m_I.resize(rows, cols);
+		m_J.resize(rows, cols);
+		m_c.resize(rows, cols);
+		m_iN.resize(rows);
+		m_iS.resize(rows);
+		m_jW.resize(cols);
+		m_jE.resize(cols);
+		m_dN.resize(rows, cols);
+		m_dS.resize(rows, cols);
+		m_dW.resize(rows, cols);
+		m_dE.resize(rows, cols);
+	}
+	void run(const int niter)
+	{
+		this->fill_indices();
+		fmt::println("Randomizing the input matrix");
+		this->random_matrix();
+		m_J.array() = Eigen::exp(m_I.array());
+		fmt::println("Start the SRAD main loop");
+		for (int iter = 0; iter < niter; iter++)
+			this->iterate();
+		for (Eigen::Index i = 0; i < m_J.rows(); i++) {
+			for (Eigen::Index j = 0; j < m_J.cols(); j++)
+				fmt::print("{:.5f} ", m_J(i, j));
+			fmt::println("");
+		}
+		fmt::println("Computation Done");
+	}
+private:
+	void fill_indices()
+	{
+		for (Eigen::Index i = 0; i < m_iN.size(); i++) {
+			m_iN(i) = i - 1;
+			m_iS(i) = i + 1;
+		}
+		for (Eigen::Index j = 0; j < m_jW.size(); j++) {
+			m_jW(j) = j - 1;
+			m_jE(j) = j + 1;
+		}
+		m_iN(0) = 0;
+		m_iS(m_iS.size() - 1) = m_iS.size() - 1;
+		m_jW(0) = 0;
+		m_jE(m_jE.size() - 1) = m_jE.size() - 1;
+	}
+	void random_matrix()
+	{
+		srand(7);
+		for (Eigen::Index i = 0; i < m_I.rows(); i++) {
+			for (Eigen::Index j = 0; j < m_I.cols(); j++)
+				m_I(i, j) = rand() / static_cast<float>(RAND_MAX);
+		}
+	}
+	void iterate()
+	{
+		float sum = 0;
+		float sum2 = 0;
+		for (Eigen::Index i = m_r1; i <= m_r2; i++) {
+			for (Eigen::Index j = m_c1; j <= m_c2; j++) {
+				const float tmp = m_J(i, j);
+				sum += tmp;
+				sum2 += tmp * tmp;
+			}
+		}
+		const float meanROI = sum / m_size_R;
+		const float varROI = (sum2 / m_size_R) - (meanROI * meanROI);
+		const float q0sqr = varROI / (meanROI * meanROI);
+		#pragma omp parallel for
+		for (Eigen::Index i = 0; i < m_J.rows(); i++) {
+			for (Eigen::Index j = 0; j < m_J.cols(); j++) {
+				const float Jc = m_J(i, j);
+				// directional derivates
+				m_dN(i, j) = m_J(m_iN(i), j) - Jc;
+				m_dS(i, j) = m_J(m_iS(i), j) - Jc;
+				m_dW(i, j) = m_J(i, m_jW(j)) - Jc;
+				m_dE(i, j) = m_J(i, m_jE(j)) - Jc;
+				const float G2 =
+					(m_dN(i, j) * m_dN(i, j) + m_dS(i, j) * m_dS(i, j) +
+				         m_dW(i, j) * m_dW(i, j) + m_dE(i, j) * m_dE(i, j)) /
+					(Jc * Jc);
+				const float L =
+					(m_dN(i, j) + m_dS(i, j) + m_dW(i, j) + m_dE(i, j)) / Jc;
+				const float num = (0.5 * G2) - ((1.0 / 16.0) * L * L);
+				float den = 1 + (0.25 * L);
+				const float qsqr = num / (den * den);
+				// diffusion coefficent (equ 33)
+				den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr));
+				m_c(i, j) = std::clamp(1.0 / (1.0 + den), 0.0, 1.0);
+			}
+		}
+		#pragma omp parallel for
+		for (Eigen::Index i = 0; i < m_c.rows(); i++) {
+			for (Eigen::Index j = 0; j < m_c.cols(); j++) {
+				// diffusion coefficent
+				const float cN = m_c(i, j);
+				const float cS = m_c(m_iS(i), j);
+				const float cW = m_c(i, j);
+				const float cE = m_c(i, m_jE(j));
+				// divergence (equ 58)
+				const float D = cN * m_dN(i, j) + cS * m_dS(i, j) +
+				                cW * m_dW(i, j) + cE * m_dE(i, j);
+				// image update (equ 61)
+				m_J(i, j) += 0.25 * m_lambda * D;
+			}
+		}
+	}
+};
+static int usage(const char *program)
+{
+	// clang-format off
+	fmt::println(stderr, "Usage: {} <rows> <cols> <y1> <y2> <x1> <x2> <lambda> <no. of iter>", program);
+	fmt::println(stderr, "\t<rows>        - number of rows");
+	fmt::println(stderr, "\t<cols>        - number of columns");
+	fmt::println(stderr, "\t<y1>          - y1 value of the speckle");
+	fmt::println(stderr, "\t<y2>          - y2 value of the speckle");
+	fmt::println(stderr, "\t<x1>          - x1 value of the speckle");
+	fmt::println(stderr, "\t<x2>          - x2 value of the speckle");
+	fmt::println(stderr, "\t<lambda>      - lambda (0, 1)");
+	fmt::println(stderr, "\t<no. of iter> - number of iterations");
+	// clang-format on
+	return EXIT_FAILURE;
+}
+int main(int argc, char **argv)
+{
+	if (argc != 9)
+		return usage(argv[0]);
+	const int rows = std::atoi(argv[1]);     // number of rows in the domain
+	const int cols = std::atoi(argv[2]);     // number of cols in the domain
+	const int r1 = std::atoi(argv[3]);       // y1 position of the speckle
+	const int r2 = std::atoi(argv[4]);       // y2 position of the speckle
+	const int c1 = std::atoi(argv[5]);       // x1 position of the speckle
+	const int c2 = std::atoi(argv[6]);       // x2 position of the speckle
+	const float lambda = std::atof(argv[7]); // Lambda value
+	const int niter = std::atoi(argv[8]);    // number of iterations
+	SRAD srad {rows, cols, r1, r2, c1, c2, lambda};
+	srad.run(niter);
+	return EXIT_SUCCESS;
+}
--- a/src/benchmarks/rodinia-srad/meson.build
+++ b/src/benchmarks/rodinia-srad/meson.build
 subdir('c')
+subdir('cpp')
--- a/subprojects/fmt.wrap
+++ b/subprojects/fmt.wrap
+[wrap-file]
+directory = fmt-10.2.0
+source_url = https://github.com/fmtlib/fmt/archive/10.2.0.tar.gz
+source_filename = fmt-10.2.0.tar.gz
+source_hash = 3ca91733a7313a8ad41c0885929415f8ec0a2a31d4dc7e27e9331412f4ca26ac
+patch_filename = fmt_10.2.0-2_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/fmt_10.2.0-2/get_patch
+patch_hash = 2428c3a386a8390c76378f81ef804a297f4edc3b789499dd56629b7902b8ddb7
+source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/fmt_10.2.0-2/fmt-10.2.0.tar.gz
+wrapdb_version = 10.2.0-2
+[provide]
+fmt = fmt_dep