From cdd4bac26b1c6a076911367863b0d8236c6e0045 Mon Sep 17 00:00:00 2001 From: Dorian Stoll <dorian.stoll@uni-potsdam.de> Date: Sun, 9 Jun 2024 20:45:55 +0200 Subject: [PATCH] zellularautomat: c: Init from provided reference --- src/benchmarks/zellularautomat/c/Makefile | 96 ++++++++ src/benchmarks/zellularautomat/c/ca_common.c | 189 ++++++++++++++++ src/benchmarks/zellularautomat/c/ca_common.h | 60 +++++ src/benchmarks/zellularautomat/c/ca_mpi_p2p.c | 123 +++++++++++ src/benchmarks/zellularautomat/c/ca_openmp.c | 97 ++++++++ src/benchmarks/zellularautomat/c/ca_seq.c | 90 ++++++++ src/benchmarks/zellularautomat/c/random.c | 207 ++++++++++++++++++ src/benchmarks/zellularautomat/c/random.h | 98 +++++++++ 8 files changed, 960 insertions(+) create mode 100644 src/benchmarks/zellularautomat/c/Makefile create mode 100644 src/benchmarks/zellularautomat/c/ca_common.c create mode 100644 src/benchmarks/zellularautomat/c/ca_common.h create mode 100644 src/benchmarks/zellularautomat/c/ca_mpi_p2p.c create mode 100644 src/benchmarks/zellularautomat/c/ca_openmp.c create mode 100644 src/benchmarks/zellularautomat/c/ca_seq.c create mode 100644 src/benchmarks/zellularautomat/c/random.c create mode 100644 src/benchmarks/zellularautomat/c/random.h diff --git a/src/benchmarks/zellularautomat/c/Makefile b/src/benchmarks/zellularautomat/c/Makefile new file mode 100644 index 00000000..e002686c --- /dev/null +++ b/src/benchmarks/zellularautomat/c/Makefile @@ -0,0 +1,96 @@ +BASE_CC=gcc +MPI_CC=mpicc + +COMMON_CFLAGS=-O2 +COMMON_LDFLAGS=-lcrypto -lrt + +BASE_CFLAGS=-Wall -std=gnu99 -pedantic + +OMP_CFLAGS=-fopenmp + +MPI_CFLAGS=-DUSE_MPI + +C_DEPS=ca_common.c random.c + +MPI_TARGETS=ca_mpi_p2p ca_mpi_p2p_hybrid + +TARGETS=ca_seq \ + ca_openmp \ + $(MPI_TARGETS) + +.PHONY: all +all: $(TARGETS) + +.PHONY: cpu +cpu: ca_seq ca_openmp + +.PHONY: mpi +mpi: $(MPI_TARGETS) + +ca_seq: ca_seq.c $(C_DEPS) + $(BASE_CC) $(COMMON_CFLAGS) $(BASE_CFLAGS) $^ $(COMMON_LDFLAGS) -o $@ + +ca_openmp: ca_openmp.c $(C_DEPS) + $(BASE_CC) $(COMMON_CFLAGS) $(BASE_CFLAGS) $(OMP_CFLAGS) $^ $(COMMON_LDFLAGS) -o $@ + +ca_mpi_p2p: ca_mpi_p2p.c $(C_DEPS) + $(MPI_CC) $(COMMON_CFLAGS) $(BASE_CFLAGS) $(MPI_CFLAGS) $^ $(COMMON_LDFLAGS) -o $@ + +ca_mpi_p2p_hybrid: ca_mpi_p2p.c $(C_DEPS) + $(MPI_CC) $(COMMON_CFLAGS) $(BASE_CFLAGS) $(MPI_CFLAGS) $(OMP_CFLAGS) $^ $(COMMON_LDFLAGS) -o $@ + +.PHONY: test + +test: $(TARGETS) + @for ITS in 10 31 57 100; do \ + for LINES in 10 33 47 100; do \ + echo "$$LINES lines, $$ITS iterations"; \ + for BINARY in $^; do printf '%-10s\t' $$BINARY; ./$$BINARY $$LINES $$ITS; done; \ + done \ + done + +.PHONY: mpi-test + +mpi-test: $(MPI_TARGETS) + @for ITS in 10 31 57 100; do \ + for LINES in 20 33 47 100; do \ + for NP in 2 3 4; do \ + echo "$$LINES lines, $$ITS iterations, $$NP procs"; \ + for BINARY in ca_seq $^; do \ + printf '%-10s\t' $$BINARY; \ + mpiexec -n $$NP ./$$BINARY $$LINES $$ITS; \ + done \ + done \ + done \ + done + +.PHONY: bench + +bench: ca_openmp + @for ITS in 128 256 512; do \ + for LINES in 1000 10000 50000; do \ + echo "$$LINES lines, $$ITS iterations"; \ + for BINARY in $^; do printf '%-10s\t' $$BINARY; time ./$$BINARY $$LINES $$ITS; done; \ + done \ + done + +.PHONY: omp_scaling + +omp_scaling: ca_seq ca_openmp + @for ITS in 128 256 512; do \ + for LINES in 1000 10000 50000; do \ + echo "$$LINES lines, $$ITS iterations"; \ + printf "sequential \t"; \ + ./ca_seq $$LINES $$ITS; \ + for THREADS in `seq \`nproc\``; do \ + printf "$$THREADS threads\t"; \ + OMP_NUM_THREADS=$$THREADS ./ca_openmp $$LINES $$ITS; \ + done; \ + done; \ + done + +.PHONY: clean + +clean: + rm -f *.o + rm -f $(TARGETS) diff --git a/src/benchmarks/zellularautomat/c/ca_common.c b/src/benchmarks/zellularautomat/c/ca_common.c new file mode 100644 index 00000000..99e000d3 --- /dev/null +++ b/src/benchmarks/zellularautomat/c/ca_common.c @@ -0,0 +1,189 @@ +/* + * common functions for (parallel) cellular automaton + * + * (c) 2016 Steffen Christgau + * + * configuration initialization based on + * (c) 1996,1997 Peter Sanders, Ingo Boesnach + * + */ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> + +#include "openssl/md5.h" +#include "openssl/evp.h" + +#include "ca_common.h" +#include "random.h" + +#ifdef USE_MPI +#include <mpi.h> +#endif + +/* determine random integer between 0 and n-1 */ +#define randInt(n) ((int)(nextRandomLEcuyer() * n)) + +void ca_init(int argc, char** argv, int *lines, int *its) +{ + assert(argc == 3); + + *lines = atoi(argv[1]); + *its = atoi(argv[2]); + + assert(*lines > 0); +} + +/* random starting configuration */ +void ca_init_config(line_t *buf, int lines, int skip_lines) +{ + volatile int scratch; + + initRandomLEcuyer(424243); + + /* let the RNG spin for some rounds (used for distributed initialization) */ + for (int y = 1; y <= skip_lines; y++) { + for (int x = 1; x <= XSIZE; x++) { + scratch = scratch + randInt(100) >= 50; + } + } + + for (int y = 1; y <= lines; y++) { + for (int x = 1; x <= XSIZE; x++) { + buf[y][x] = randInt(100) >= 50; + } + } +} + +static char* ca_buffer_to_hex_str(const uint8_t* buf, size_t buf_size) +{ + char *retval, *ptr; + + retval = ptr = calloc(MD5_DIGEST_LENGTH * 2 + 1, sizeof(*retval)); + for (size_t i = 0; i < MD5_DIGEST_LENGTH; i++) { + snprintf(ptr, 3, "%02X", buf[i]); + ptr += 2; + } + + return retval; +} + +static void ca_print_hash_and_time(const char *hash, const double time) +{ + printf("hash: %s\ttime: %.3f s\n", (hash ? hash : "ERROR"), time); +} + +static void ca_clean_ghost_zones(line_t *buf, int lines) +{ + for (int y = 0; y < lines; y++) { + buf[y][0] = 0; + buf[y][XSIZE + 1] = 0; + } +} + +void ca_hash_and_report(line_t *buf, int lines, double time_in_s) +{ + uint8_t hash[MD5_DIGEST_LENGTH]; + uint32_t md_len; + EVP_MD_CTX *ctx = EVP_MD_CTX_new(); + EVP_DigestInit_ex(ctx, EVP_md5(), NULL); + + ca_clean_ghost_zones(buf, lines); + + EVP_DigestUpdate(ctx, buf, lines * sizeof(*buf)); + EVP_DigestFinal_ex(ctx, hash, &md_len); + + char* hash_str = ca_buffer_to_hex_str(hash, MD5_DIGEST_LENGTH); + ca_print_hash_and_time(hash_str, time_in_s); + free(hash_str); + + EVP_MD_CTX_free(ctx); +} + +#ifdef MPI_VERSION /* defined by mpi.h */ + +static int num_remainder_procs; +#ifdef USE_MPI_TOPOLOGY +static MPI_Comm topo_comm; +#endif + +void ca_mpi_init(int num_procs, int rank, int num_total_lines, + int *num_local_lines, int *global_first_line) +{ + *num_local_lines = num_total_lines / num_procs; + *global_first_line = rank * (*num_local_lines); + + + /* if work cannot be distributed equally, distribute the remaining lines equally */ + num_remainder_procs = num_total_lines % num_procs; + if (rank < num_remainder_procs) { + (*num_local_lines)++; + *global_first_line = *global_first_line + rank; + } else { + *global_first_line = *global_first_line + num_remainder_procs; + } + +#ifdef USE_TOPO + int topo_periodic = 1, topo_dim = num_procs; + MPI_Cart_create(MPI_COMM_WORLD, 1, &topo_dim, &topo_periodic, 0, &topo_comm); +#endif +} + +#define TAG_RESULT (0xCAFE) + +void ca_mpi_hash_and_report(line_t* local_buf, int num_local_lines, + int num_total_lines, int num_procs, double time_in_s) +{ + int i, rank, num_lines = num_local_lines, count; + uint32_t md_len; + uint8_t hash[MD5_DIGEST_LENGTH]; + + EVP_MD_CTX *ctx = EVP_MD_CTX_new(); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) { + + EVP_DigestInit_ex(ctx, EVP_md5(), NULL); + count = num_local_lines; + ca_clean_ghost_zones(local_buf + 1, num_local_lines); + /* insert our own data into MD5 hash */ + + EVP_DigestUpdate(ctx, local_buf + 1, num_local_lines * sizeof(line_t)); + + /* recieve partial results from all other processes in our local buffer and + * update the hash. Our buffer is garanteed to have the maximum required + * size in any case (see partioning above) */ + for (i = 1; i < num_procs; i++) { + num_lines = num_total_lines / num_procs; + if (i < num_remainder_procs) { + num_lines++; + } + count += num_lines; + MPI_Recv( + local_buf, num_lines * LINE_SIZE, CA_MPI_CELL_DATATYPE, + i, TAG_RESULT, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + ca_clean_ghost_zones(local_buf, num_lines); + EVP_DigestUpdate(ctx, local_buf, num_lines * sizeof(line_t)); + } + + EVP_DigestFinal_ex(ctx, hash, &md_len); + + char* hash_str = ca_buffer_to_hex_str(hash, MD5_DIGEST_LENGTH); + ca_print_hash_and_time(hash_str, time_in_s); + + free(hash_str); + EVP_MD_CTX_free(ctx); + } else { + MPI_Send( + local_buf[1], num_local_lines * LINE_SIZE, CA_MPI_CELL_DATATYPE, + 0, TAG_RESULT, MPI_COMM_WORLD); + } + +#ifdef USE_MPI_TOPOLOGY + MPI_Comm_free(&topo_comm); +#endif +} + +#endif /* MPI_VERSION */ diff --git a/src/benchmarks/zellularautomat/c/ca_common.h b/src/benchmarks/zellularautomat/c/ca_common.h new file mode 100644 index 00000000..0ec35c2b --- /dev/null +++ b/src/benchmarks/zellularautomat/c/ca_common.h @@ -0,0 +1,60 @@ +#ifndef CA_COMMON_H +#define CA_COMMON_H + +#include <stdint.h> +#include <stddef.h> +#include <time.h> + +/* a: pointer to array; x,y: coordinates; result: n-th element of anneal, + where n is the number of neighbors */ +#define transition(a, x, y) \ + (anneal[(a)[(y)-1][(x)-1] + (a)[(y)][(x)-1] + (a)[(y)+1][(x)-1] +\ + (a)[(y)-1][(x) ] + (a)[(y)][(x) ] + (a)[(y)+1][(x) ] +\ + (a)[(y)-1][(x)+1] + (a)[(y)][(x)+1] + (a)[(y)+1][(x)+1]]) + +#define TIME_GET(timer) \ + struct timespec timer; \ + clock_gettime(CLOCK_MONOTONIC, &timer) + +#define TIME_DIFF(timer1, timer2) \ + ((timer2.tv_sec * 1.0E+9 + timer2.tv_nsec) - \ + (timer1.tv_sec * 1.0E+9 + timer1.tv_nsec)) / 1.0E+9 + + +#ifdef __cplusplus +extern "C" { +#endif + +/* horizontal size of the configuration */ +#define XSIZE 1024 +#define LINE_SIZE (XSIZE + 2) + +/* "ADT" State and line of states (plus border) */ +typedef uint8_t cell_state_t; +typedef cell_state_t line_t[XSIZE + 2]; + +void ca_init(int argc, char** argv, int *lines, int *its); +void ca_init_config(line_t *buf, int lines, int skip_lines); +void ca_hash_and_report(line_t *buf, int lines, double time_in_s); + +#ifdef __cplusplus +} +#endif + +#ifdef USE_MPI + +/* next/prev process in communicator */ +#define PREV_PROC(n, num_procs) ((n - 1 + num_procs) % num_procs) +#define SUCC_PROC(n, num_procs) ((n + 1) % num_procs) + +#define CA_MPI_CELL_DATATYPE MPI_BYTE + +void ca_mpi_init(int num_procs, int rank, int num_total_lines, + int *num_local_lines, int *global_first_line); +void ca_mpi_hash_and_report(line_t* local_buf, int num_local_lines, + int num_total_lines, int num_procs, double time_in_s); + +#endif /* USE_MPI */ + + +#endif /* CA_COMMON_H */ diff --git a/src/benchmarks/zellularautomat/c/ca_mpi_p2p.c b/src/benchmarks/zellularautomat/c/ca_mpi_p2p.c new file mode 100644 index 00000000..558564b9 --- /dev/null +++ b/src/benchmarks/zellularautomat/c/ca_mpi_p2p.c @@ -0,0 +1,123 @@ +/* + * simulate a cellular automaton with periodic boundaries (torus-like) + * MPI version using two-sided blocking communication + * + * (c) 2016 Steffen Christgau (C99 port, modularization, parallelization) + * (c) 1996,1997 Peter Sanders, Ingo Boesnach (original source) + * + * command line arguments: + * #1: Number of lines + * #2: Number of iterations to be simulated + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <mpi.h> + +#include "ca_common.h" + +/* tags for communication */ +#define TAG_SEND_UPPER_BOUND (1) +#define TAG_SEND_LOWER_BOUND (2) + +#define TAG_RECV_UPPER_BOUND TAG_SEND_LOWER_BOUND +#define TAG_RECV_LOWER_BOUND TAG_SEND_UPPER_BOUND + +/* --------------------- CA simulation -------------------------------- */ + +/* annealing rule from ChoDro96 page 34 + * the table is used to map the number of nonzero + * states in the neighborhood to the new state + */ +static const cell_state_t anneal[10] = {0, 0, 0, 0, 1, 0, 1, 1, 1, 1}; + +/* treat torus like boundary conditions */ +static void boundary(line_t *buf, int lines) +{ + for (int y = 0; y <= lines + 1; y++) { + /* copy rightmost column to the buffer column 0 */ + buf[y][0] = buf[y][XSIZE]; + + /* copy leftmost column to the buffer column XSIZE + 1 */ + buf[y][XSIZE+1] = buf[y][1]; + } + + /* no wrap of upper/lower boundary, since it is done by exchanged ghost zones */ +} + +/* make one simulation iteration with lines lines. + * old configuration is in from, new one is written to to. + */ +static void simulate(line_t *from, line_t *to, int lines) +{ + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (int y = 1; y <= lines; y++) { + for (int x = 1; x <= XSIZE; x++) { + to[y][x] = transition(from, x, y); + } + } +} + +/* --------------------- measurement ---------------------------------- */ + +int main(int argc, char** argv) +{ + int num_total_lines, num_local_lines, num_skip_lines, its; + + /* init MPI and application */ + MPI_Init(&argc, &argv); + + ca_init(argc, argv, &num_total_lines, &its); + + int num_procs, local_rank; + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &local_rank); + + ca_mpi_init(num_procs, local_rank, num_total_lines, + &num_local_lines, &num_skip_lines); + + line_t *from = calloc((num_local_lines + 2), sizeof(*from)); + line_t *to = calloc((num_local_lines + 2), sizeof(*to)); + + ca_init_config(from, num_local_lines, num_skip_lines); + + /* actual computation */ + TIME_GET(sim_start); + for (int i = 0; i < its; i++) { + MPI_Sendrecv( + from[1], LINE_SIZE, CA_MPI_CELL_DATATYPE, + PREV_PROC(local_rank, num_procs), TAG_SEND_UPPER_BOUND, + from[num_local_lines + 1], LINE_SIZE, CA_MPI_CELL_DATATYPE, + SUCC_PROC(local_rank, num_procs), TAG_RECV_LOWER_BOUND, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + + MPI_Sendrecv( + from[num_local_lines], LINE_SIZE, CA_MPI_CELL_DATATYPE, + SUCC_PROC(local_rank, num_procs), TAG_SEND_LOWER_BOUND, + from[0], LINE_SIZE, CA_MPI_CELL_DATATYPE, + PREV_PROC(local_rank, num_procs), TAG_RECV_UPPER_BOUND, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + + boundary(from, num_local_lines); + simulate(from, to, num_local_lines); + + line_t *temp = from; + from = to; + to = temp; + } + TIME_GET(sim_stop); + + ca_mpi_hash_and_report(from, num_local_lines, num_total_lines, + num_procs, TIME_DIFF(sim_start, sim_stop)); + + free(from); + free(to); + + MPI_Finalize(); + + return EXIT_SUCCESS; +} diff --git a/src/benchmarks/zellularautomat/c/ca_openmp.c b/src/benchmarks/zellularautomat/c/ca_openmp.c new file mode 100644 index 00000000..b42fc1ea --- /dev/null +++ b/src/benchmarks/zellularautomat/c/ca_openmp.c @@ -0,0 +1,97 @@ +/* + * simulate a cellular automaton with periodic boundaries (torus-like) + * OpenMP version + * + * (c) 2016 Steffen Christgau (C99 port, modularization, parallelization) + * (c) 1996,1997 Peter Sanders, Ingo Boesnach (original source) + * + * command line arguments: + * #1: Number of lines + * #2: Number of iterations to be simulated + * + */ +#include <stdio.h> +#include <stdlib.h> + +#include "ca_common.h" + +/* --------------------- CA simulation -------------------------------- */ + +/* annealing rule from ChoDro96 page 34 + * the table is used to map the number of nonzero + * states in the neighborhood to the new state + */ +static const cell_state_t anneal[10] = {0, 0, 0, 0, 1, 0, 1, 1, 1, 1}; + +/* treat torus like boundary conditions */ +static void boundary(line_t *buf, int lines) +{ + #pragma omp sections + { + #pragma omp section + #pragma omp parallel for + for (int y = 0; y <= lines + 1; y++) { + /* copy rightmost column to the buffer column 0 */ + buf[y][0] = buf[y][XSIZE]; + + /* copy leftmost column to the buffer column XSIZE + 1 */ + buf[y][XSIZE + 1] = buf[y][1]; + } + + #pragma omp section + #pragma omp parallel for + for (int x = 0; x <= XSIZE + 1; x++) { + /* copy bottommost row to buffer row 0 */ + buf[0][x] = buf[lines][x]; + + /* copy topmost row to buffer row lines + 1 */ + buf[lines + 1][x] = buf[1][x]; + } + } +} + +/* make one simulation iteration with lines lines. + * old configuration is in from, new one is written to to. + */ +static void simulate(line_t *from, line_t *to, int lines) +{ + #pragma omp parallel for + for (int y = 1; y <= lines; y++) { + for (int x = 1; x <= XSIZE; x++) { + /* transition is defined in ca_common.h */ + to[y][x] = transition(from, x, y); + } + } +} + +/* --------------------- main routine --------------------------------- */ + +int main(int argc, char** argv) +{ + int lines, its; + + ca_init(argc, argv, &lines, &its); + + line_t *from = calloc((lines + 2), sizeof(line_t)); + line_t *to = calloc((lines + 2), sizeof(line_t)); + + ca_init_config(from, lines, 0); + + TIME_GET(sim_start); + for (int i = 0; i < its; i++) { + boundary(from, lines); + simulate(from, to, lines); + + line_t *temp = from; + from = to; + to = temp; + } + TIME_GET(sim_stop); + + ca_hash_and_report(from + 1, lines, TIME_DIFF(sim_start, sim_stop)); + + free(from); + free(to); + + return EXIT_SUCCESS; +} diff --git a/src/benchmarks/zellularautomat/c/ca_seq.c b/src/benchmarks/zellularautomat/c/ca_seq.c new file mode 100644 index 00000000..ff187f0a --- /dev/null +++ b/src/benchmarks/zellularautomat/c/ca_seq.c @@ -0,0 +1,90 @@ +/* + * simulate a cellular automaton with periodic boundaries (torus-like) + * serial version + * + * (c) 2016 Steffen Christgau (C99 port, modularization) + * (c) 1996,1997 Peter Sanders, Ingo Boesnach (original source) + * + * command line arguments: + * #1: Number of lines + * #2: Number of iterations to be simulated + * + */ +#include <stdio.h> +#include <stdlib.h> + +#include "ca_common.h" +#include <time.h> + +/* --------------------- CA simulation -------------------------------- */ + +/* annealing rule from ChoDro96 page 34 + * the table is used to map the number of nonzero + * states in the neighborhood to the new state + */ +static const cell_state_t anneal[10] = {0, 0, 0, 0, 1, 0, 1, 1, 1, 1}; + +/* treat torus like boundary conditions */ +static void boundary(line_t *buf, int lines) +{ + for (int y = 0; y <= lines + 1; y++) { + /* copy rightmost column to the buffer column 0 */ + buf[y][0] = buf[y][XSIZE]; + + /* copy leftmost column to the buffer column XSIZE + 1 */ + buf[y][XSIZE + 1] = buf[y][1]; + } + + for (int x = 0; x <= XSIZE + 1; x++) { + /* copy bottommost row to buffer row 0 */ + buf[0][x] = buf[lines][x]; + + /* copy topmost row to buffer row lines + 1 */ + buf[lines + 1][x] = buf[1][x]; + } +} + +/* make one simulation iteration with lines lines. + * old configuration is in from, new one is written to to. + */ +static void simulate(line_t *from, line_t *to, int lines) +{ + for (int y = 1; y <= lines; y++) { + for (int x = 1; x <= XSIZE; x++) { + /* transition is defined in ca_common.h */ + to[y][x] = transition(from, x, y); + } + } +} + +/* --------------------- measurement ---------------------------------- */ + +int main(int argc, char** argv) +{ + int lines, its; + + ca_init(argc, argv, &lines, &its); + + line_t *from = calloc((lines + 2), sizeof(line_t)); + line_t *to = calloc((lines + 2), sizeof(line_t)); + + ca_init_config(from, lines, 0); + + TIME_GET(sim_start); + for (int i = 0; i < its; i++) { + boundary(from, lines); + simulate(from, to, lines); + + line_t *temp = from; + from = to; + to = temp; + } + TIME_GET(sim_stop); + + ca_hash_and_report(from + 1, lines, TIME_DIFF(sim_start, sim_stop)); + + free(from); + free(to); + + return EXIT_SUCCESS; +} diff --git a/src/benchmarks/zellularautomat/c/random.c b/src/benchmarks/zellularautomat/c/random.c new file mode 100644 index 00000000..2a186bcb --- /dev/null +++ b/src/benchmarks/zellularautomat/c/random.c @@ -0,0 +1,207 @@ +#include "random.h" +/* (c) 1996,1997 Thomas Worsch, Peter Sanders */ +/* ===================================================================== + * The pseudo random number generator functions in this file are + * similar to those in + * Numerical Recipes in C, Second Edition, pages 279-282. + * + * The RNGs have names nextRandom<something> and they return a Float64. + * They are initialized by a call to initRandom<something> which + * take an Int32 as seed. + */ + +#define EPS 1.2e-7 +#define RNMX (1.0-EPS) +#define IM 2147483647 +#define AM ((Float64)1.0/IM) +#define IA 16807 +#define IQ 127773 +#define IR 2836 + +static Int32 state = 123456789; + +void initRandomParkMiller(Int32 seed) +{ + state = seed; + /* but we have to make sure that state never ever is set to zero */ + if (state==0) { state = 42; } +} + +Float64 nextRandomParkMiller(void) +{ + Int32 k; + Float64 result; + + k = state/IQ; + state = IA*(state-k*IQ)-k*IR; + if (state < 0) { state += IM; } + result = AM*state; + if (result >= 1.0) { result = RNMX; } + return result; +} + +/* ===================================================================== + * Now a pseudo RNG with a longer period (roughly 10^18) by lEcuyer + * (Numerical Recipes, page 282). + */ + +#define IM1 2147483563 +#define IM2 2147483399 +#define AM1 ((Float64)1.0/IM1) +#define IMM1 (IM1-1) +#define IA1 40014 +#define IA2 40692 +#define IQ1 53668 +#define IQ2 52774 +#define IR1 12211 +#define IR2 3791 + +#define NTAB 32 +#define NDIV (1+IMM1/NTAB) + +static Int32 state1 = 987654321; +static Int32 state2; +static Int32 y; +static Int32 v[NTAB]; + +/* ------------------------------------------------------------------ */ +static void initRandomSeedLEcuyer(Int32 seed) +{ + state1 = seed; + if (state1==0) { state1 = 987654321; } + state2 = state1; +} + +/* ------------------------------------------------------------------ */ +static void initRandomTabLEcuyer(void) +{ + Int32 j, k; + + for (j=NTAB+7; j>=0; j--) { + k = state1/IQ1; + state1 = IA1*(state1-k*IQ1)-k*IR1; + if (state1 < 0) { state1 += IM1; } + if (j < NTAB) { v[j] = state1; } + } + y = v[0]; +} + +/* ------------------------------------------------------------------ */ +void initRandomLEcuyer(Int32 seed) +{ + initRandomSeedLEcuyer(seed); + initRandomTabLEcuyer(); +} + +/* ------------------------------------------------------------------ */ +static Int32 power(Int32 base, Card64 exp, Int32 modulus) +{ + Int64 temp = 1; + Card64 mask; + + if (base < 0) { return 0; } + + /* note that at on each entry into the following loop body, the + actual value of temp is always positive and fits into an Int32 */ + for (mask = ((Card64)1) << 63; mask != 0; mask >>= 1) { + temp = (temp * temp) % modulus; + if (exp & mask) { + temp = (temp * base) % modulus; + } + } + return ((Int32) temp); +} + +/* ------------------------------------------------------------------ */ +static void forwardRandomLEcuyer(Card64 steps) +{ + Int32 a; + + a = power(IA1, steps, IM1); + state1 = (Int32) ( (((Int64)a) * state1) % IM1); + + a = power(IA2, steps, IM2); + state2 = (Int32) ( (((Int64)a) * state2) % IM2); +} + +/* ------------------------------------------------------------------ */ +/* + * The following initialization function is intended for use on a + * parallel machine. + * Each PE must call initParallelRandomLEcuyer once at the beginning. + * Calls to nextRandomLEcuyer will then return different (and hopefully + * pseudo unrelated) pseudo random numbers on different PEs. + * The calls to initParallelRandomLEcuyer on different PEs + * *MUST* *SATISFY* *THE* *FOLLOWING* *CONDITIONS*: + * - The parameter seed is the same on all PEs. + * - The parameter total is the same on all PEs and it must be + * the total number of PEs using the RNG. + * - The parameter pe must be different on each PE and for each + * i in the set {0,1,...,total-1} there must be exactly one PE + * calling initParallelRandomLEcuyer with parameter pe set to i. + * Please note: + * - There are *NO* run time checks to see whether the above + * conditions have been satisfied. + * - The numbers generated on different PEs are probably only more or + * less unrelated as long as the number of calls of nextRandomLEcuyer + * on each of the PEs is smaller than the period of the RNG + * (approx. 10^18) divided by the number of PEs (probably below 10^4). + * Since 10^14 calls of nextRandomLEcuyer will take quite some time, + * you are probably safe using this RNG. + * - This function uses nextRandomParkMiller. If you want to have + * reproducible results, make sure that you do not use + * RandomParkMiller yourself before calling initParallelRandomLEcuyer. + */ +void initParallelRandomLEcuyer(Int32 seed, Int32 pe, Int32 total) +{ + Card64 steps; + + initRandomSeedLEcuyer(seed); + + /* The period of the RNG is roughly 2.3e18, i.e. 2^61, + which should be distributed onto the PEs approximately equally; + because we do not know the exact value we are careful and take + one half of the average length of the interval per PE: */ + steps = (((Card64)1) << 60)/total; + + /* For PE number pe we get the starting point: */ + steps = steps * pe; + + /* Finally the starting point for each PE is randomly shifted + by an amount which is small compared to the length of + its interval (as long as there are much less than 2^30 PEs :-). + Therefore steps will still be far below the end of its interval: */ + steps = steps + (Card64) (nextRandomParkMiller() * (((Card64)1) << 30)); + + /* Now the RNG is initialized for PE pe as if it had already made steps + many steps from the initial seed. */ + forwardRandomLEcuyer(steps); + + initRandomTabLEcuyer(); +} + +/* ------------------------------------------------------------------ */ +Float64 nextRandomLEcuyer(void) +{ + Int32 k; + Float64 result; + int j; + + k = state1/IQ1; + state1 = IA1*(state1-k*IQ1)-k*IR1; + if (state1 < 0) { state1 += IM1; } + + k = state2/IQ2; + state2 = IA2*(state2-k*IQ2)-k*IR2; + if (state2 < 0) { state2 += IM2; } + + j = y/NDIV; + y = v[j] - state2; + v[j] = state1; + + if (y < 1) { y += IMM1; } + + result = AM1*y; + if (result >= 1.0) { result = RNMX; } + return result; +} diff --git a/src/benchmarks/zellularautomat/c/random.h b/src/benchmarks/zellularautomat/c/random.h new file mode 100644 index 00000000..8f4e6767 --- /dev/null +++ b/src/benchmarks/zellularautomat/c/random.h @@ -0,0 +1,98 @@ +#include <limits.h> +/* (c) 1996,1997 Thomas Worsch, Peter Sanders */ +/* ===================================================================== + * The pseudo random number generator functions in this file are + * similar to those in + * Numerical Recipes in C, Second Edition, pages 279-282. + * + * The RNGs have names nextRandom<something> and they return a Float64. + * They are initialized by a call to initRandom<something> which + * take an Int32 as seed. + */ + +/* C++ compatibility */ +#ifdef __cplusplus +#define CC extern "C" +#else +#define CC +#endif + +/* try to find out how 32 bit and 64 bit + * interger types look like in this compiler + * this may fail + * e.g., if the compiler does not support 64 data types... + */ +#if UINT_MAX >> 31 == 1 +typedef int Int32; +typedef unsigned int Card32; +#elif USHRT_MAX >> 31 == 1 +typedef short Int32; +typedef unsigned short Card32; +#elif ULONG_MAX >> 31 == 1 +typedef short Int32; +typedef unsigned short Card32; +#else /* provoke error */ +typedef nonexisting Int32; +typedef nonexisting Card32; +#endif + +#if UINT_MAX >> 63 == 1 +typedef int Int64; +typedef unsigned int Card64; +#elif ULONG_MAX >> 63 == 1 +typedef long Int64; +typedef unsigned long Card64; +#else +typedef long long int Int64; +typedef unsigned long long int Card64; +#endif + +typedef double Float64; + +/* ===================================================================== + * The Minimal Standard pseudo RNG (Numerical Recipes, page 279) + * by Park and Miller + * I added an initialization function and could therefore remove + * the MASK mechanism from the original ran0 function. + */ +CC void initRandomParkMiller(Int32 seed); +CC Float64 nextRandomParkMiller (void); + + +/* ===================================================================== + * Now a pseudo RNG with a longer period (roughly 10^18) by lEcuyer + * (Numerical Recipes, page 282). + */ +CC void initRandomLEcuyer(Int32 seed); +CC Float64 nextRandomLEcuyer (void); + + +/* ------------------------------------------------------------------ */ +/* + * The following initialization function is intended for use on a + * parallel machine. + * Each PE must call initParallelRandomLEcuyer once at the beginning. + * Calls to nextRandomLEcuyer will then return different (and hopefully + * pseudo unrelated) pseudo random numbers on different PEs. + * The calls to initParallelRandomLEcuyer on different PEs + * *MUST* *SATISFY* *THE* *FOLLOWING* *CONDITIONS*: + * - The parameter seed is the same on all PEs. + * - The parameter total is the same on all PEs and it must be + * the total number of PEs using the RNG. + * - The parameter pe must be different on each PE and for each + * i in the set {0,1,...,total-1} there must be exactly one PE + * calling initParallelRandomLEcuyer with parameter pe set to i. + * Please note: + * - There are *NO* run time checks to see whether the above + * conditions have been satisfied. + * - The numbers generated on different PEs are probably only more or + * less unrelated as long as the number of calls of nextRandomLEcuyer + * on each of the PEs is smaller than the period of the RNG + * (approx. 10^18) divided by the number of PEs (probably below 10^4). + * Since 10^14 calls of nextRandomLEcuyer will take quite some time, + * you are probably safe using this RNG. + * - This function uses nextRandomParkMiller. If you want to have + * reproducible results, make sure that you do not use + * RandomParkMiller yourself before calling initParallelRandomLEcuyer. + */ +CC void initParallelRandomLEcuyer(Int32 seed, Int32 pe, Int32 total); -- GitLab