Skip to content
Snippets Groups Projects
Verified Commit a6b43bc5 authored by Dorian Stoll's avatar Dorian Stoll
Browse files

rodinia-srad: cpp: Initial port

The results are mostly identical with the output from the C reference,
but there are some floating point errors that prevent the hashes from
matching.
parent f0405a6c
No related branches found
No related tags found
No related merge requests found
cc = meson.get_compiler('c')
sources = [
'srad.cpp',
]
# Build wrapped dependencies as static libraries and disable warnings
dependency_options = [
'default_library=static',
'warning_level=0',
'werror=false',
]
dependencies = [
dependency('openmp'),
dependency(
'eigen3',
fallback: ['eigen', 'eigen_dep'],
default_options: dependency_options,
include_type: 'system',
),
dependency(
'fmt',
fallback: ['fmt', 'fmt_dep'],
default_options: dependency_options,
),
]
options = [
'cpp_std=c++17',
]
executable(
'rodinia-srad-cpp',
sources,
dependencies: dependencies,
override_options: options,
)
/*
* Eigen defaults to using column-major order for its container types. But the C reference code is
* written for row-major containers. Instead of touching every for-loop in the program, simply
* change the default.
*/
#define EIGEN_DEFAULT_TO_ROW_MAJOR
#include <Eigen/Eigen>
#include <fmt/format.h>
class SRAD {
private:
template <class T>
using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
template <class T>
using Vector = Eigen::Vector<T, Eigen::Dynamic>;
private:
int m_r1;
int m_r2;
int m_c1;
int m_c2;
float m_lambda;
int m_size_R;
Matrix<float> m_I {};
Matrix<float> m_J {};
Matrix<float> m_c {};
Vector<Eigen::Index> m_iN {};
Vector<Eigen::Index> m_iS {};
Vector<Eigen::Index> m_jW {};
Vector<Eigen::Index> m_jE {};
Matrix<float> m_dN {};
Matrix<float> m_dS {};
Matrix<float> m_dW {};
Matrix<float> m_dE {};
public:
SRAD(const int rows,
const int cols,
const int r1,
const int r2,
const int c1,
const int c2,
const float lambda)
: m_r1 {r1},
m_r2 {r2},
m_c1 {c1},
m_c2 {c2},
m_lambda {lambda},
m_size_R {(r2 - r1 + 1) * (c2 - c1 + 1)}
{
if (rows % 16 != 0 || cols % 16 != 0)
throw std::invalid_argument {"rows and cols must be multiples of 16"};
m_I.resize(rows, cols);
m_J.resize(rows, cols);
m_c.resize(rows, cols);
m_iN.resize(rows);
m_iS.resize(rows);
m_jW.resize(cols);
m_jE.resize(cols);
m_dN.resize(rows, cols);
m_dS.resize(rows, cols);
m_dW.resize(rows, cols);
m_dE.resize(rows, cols);
}
void run(const int niter)
{
this->fill_indices();
fmt::println("Randomizing the input matrix");
this->random_matrix();
m_J.array() = Eigen::exp(m_I.array());
fmt::println("Start the SRAD main loop");
for (int iter = 0; iter < niter; iter++)
this->iterate();
for (Eigen::Index i = 0; i < m_J.rows(); i++) {
for (Eigen::Index j = 0; j < m_J.cols(); j++)
fmt::print("{:.5f} ", m_J(i, j));
fmt::println("");
}
fmt::println("Computation Done");
}
private:
void fill_indices()
{
for (Eigen::Index i = 0; i < m_iN.size(); i++) {
m_iN(i) = i - 1;
m_iS(i) = i + 1;
}
for (Eigen::Index j = 0; j < m_jW.size(); j++) {
m_jW(j) = j - 1;
m_jE(j) = j + 1;
}
m_iN(0) = 0;
m_iS(m_iS.size() - 1) = m_iS.size() - 1;
m_jW(0) = 0;
m_jE(m_jE.size() - 1) = m_jE.size() - 1;
}
void random_matrix()
{
srand(7);
for (Eigen::Index i = 0; i < m_I.rows(); i++) {
for (Eigen::Index j = 0; j < m_I.cols(); j++)
m_I(i, j) = rand() / static_cast<float>(RAND_MAX);
}
}
void iterate()
{
float sum = 0;
float sum2 = 0;
for (Eigen::Index i = m_r1; i <= m_r2; i++) {
for (Eigen::Index j = m_c1; j <= m_c2; j++) {
const float tmp = m_J(i, j);
sum += tmp;
sum2 += tmp * tmp;
}
}
const float meanROI = sum / m_size_R;
const float varROI = (sum2 / m_size_R) - (meanROI * meanROI);
const float q0sqr = varROI / (meanROI * meanROI);
#pragma omp parallel for
for (Eigen::Index i = 0; i < m_J.rows(); i++) {
for (Eigen::Index j = 0; j < m_J.cols(); j++) {
const float Jc = m_J(i, j);
// directional derivates
m_dN(i, j) = m_J(m_iN(i), j) - Jc;
m_dS(i, j) = m_J(m_iS(i), j) - Jc;
m_dW(i, j) = m_J(i, m_jW(j)) - Jc;
m_dE(i, j) = m_J(i, m_jE(j)) - Jc;
const float G2 =
(m_dN(i, j) * m_dN(i, j) + m_dS(i, j) * m_dS(i, j) +
m_dW(i, j) * m_dW(i, j) + m_dE(i, j) * m_dE(i, j)) /
(Jc * Jc);
const float L =
(m_dN(i, j) + m_dS(i, j) + m_dW(i, j) + m_dE(i, j)) / Jc;
const float num = (0.5 * G2) - ((1.0 / 16.0) * L * L);
float den = 1 + (0.25 * L);
const float qsqr = num / (den * den);
// diffusion coefficent (equ 33)
den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr));
m_c(i, j) = std::clamp(1.0 / (1.0 + den), 0.0, 1.0);
}
}
#pragma omp parallel for
for (Eigen::Index i = 0; i < m_c.rows(); i++) {
for (Eigen::Index j = 0; j < m_c.cols(); j++) {
// diffusion coefficent
const float cN = m_c(i, j);
const float cS = m_c(m_iS(i), j);
const float cW = m_c(i, j);
const float cE = m_c(i, m_jE(j));
// divergence (equ 58)
const float D = cN * m_dN(i, j) + cS * m_dS(i, j) +
cW * m_dW(i, j) + cE * m_dE(i, j);
// image update (equ 61)
m_J(i, j) += 0.25 * m_lambda * D;
}
}
}
};
static int usage(const char *program)
{
// clang-format off
fmt::println(stderr, "Usage: {} <rows> <cols> <y1> <y2> <x1> <x2> <lambda> <no. of iter>", program);
fmt::println(stderr, "\t<rows> - number of rows");
fmt::println(stderr, "\t<cols> - number of columns");
fmt::println(stderr, "\t<y1> - y1 value of the speckle");
fmt::println(stderr, "\t<y2> - y2 value of the speckle");
fmt::println(stderr, "\t<x1> - x1 value of the speckle");
fmt::println(stderr, "\t<x2> - x2 value of the speckle");
fmt::println(stderr, "\t<lambda> - lambda (0, 1)");
fmt::println(stderr, "\t<no. of iter> - number of iterations");
// clang-format on
return EXIT_FAILURE;
}
int main(int argc, char **argv)
{
if (argc != 9)
return usage(argv[0]);
const int rows = std::atoi(argv[1]); // number of rows in the domain
const int cols = std::atoi(argv[2]); // number of cols in the domain
const int r1 = std::atoi(argv[3]); // y1 position of the speckle
const int r2 = std::atoi(argv[4]); // y2 position of the speckle
const int c1 = std::atoi(argv[5]); // x1 position of the speckle
const int c2 = std::atoi(argv[6]); // x2 position of the speckle
const float lambda = std::atof(argv[7]); // Lambda value
const int niter = std::atoi(argv[8]); // number of iterations
SRAD srad {rows, cols, r1, r2, c1, c2, lambda};
srad.run(niter);
return EXIT_SUCCESS;
}
subdir('c') subdir('c')
subdir('cpp')
[wrap-file]
directory = fmt-10.2.0
source_url = https://github.com/fmtlib/fmt/archive/10.2.0.tar.gz
source_filename = fmt-10.2.0.tar.gz
source_hash = 3ca91733a7313a8ad41c0885929415f8ec0a2a31d4dc7e27e9331412f4ca26ac
patch_filename = fmt_10.2.0-2_patch.zip
patch_url = https://wrapdb.mesonbuild.com/v2/fmt_10.2.0-2/get_patch
patch_hash = 2428c3a386a8390c76378f81ef804a297f4edc3b789499dd56629b7902b8ddb7
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/fmt_10.2.0-2/fmt-10.2.0.tar.gz
wrapdb_version = 10.2.0-2
[provide]
fmt = fmt_dep
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment