Skip to content
Snippets Groups Projects
Verified Commit 37310e15 authored by Dorian Stoll's avatar Dorian Stoll
Browse files

nas-ft: fortran: Import from provided reference

Using class C parameters for now
parent 106cdcfe
No related branches found
No related tags found
No related merge requests found
SHELL=/bin/sh
BENCHMARK=ft
BENCHMARKU=FT
BLKFAC=32
include ../config/make.def
include ../sys/make.common
OBJS = ft.o ft_data.o ${COMMON}/${RAND}.o ${COMMON}/print_results.o \
${COMMON}/timers.o ${COMMON}/wtime.o
${PROGRAM}: config
@ver=$(VERSION); bfac=`echo $$ver|sed -e 's/^blk//' -e 's/^BLK//'`; \
if [ x$$ver != x$$bfac ] ; then \
${MAKE} BLKFAC=$${bfac:-32} exec; \
else \
${MAKE} exec; \
fi
exec: $(OBJS)
${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
.f90.o:
${FCOMPILE} $<
blk_par.h: FORCE
sed -e 's/=0/=$(BLKFAC)/' blk_par0.h > blk_par.h_wk
@ if ! `diff blk_par.h_wk blk_par.h > /dev/null 2>&1`; then \
mv -f blk_par.h_wk blk_par.h; else rm -f blk_par.h_wk; fi
FORCE:
ft.o: ft.f90 ft_data.o
ft_data.o: ft_data.f90 npbparams.h blk_par.h
clean:
- rm -f *.o *~ mputil* *.mod
- rm -f ft npbparams.h core blk_par.h
- if [ -d rii_files ]; then rm -r rii_files; fi
This code implements the time integration of a three-dimensional
partial differential equation using the Fast Fourier Transform.
The code uses Fortran 90 module to specify data fields. So, a compiler
supports Fortran 90 is required.
integer fftblock_default, fftblockpad_default
parameter (fftblock_default=32, &
& fftblockpad_default=fftblock_default+2)
This diff is collapsed.
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! ft_data module
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
module ft_data
include 'npbparams.h'
! total number of grid points with padding
integer(kind2) nxp, ntotalp
parameter (nxp=nx+1)
parameter (ntotalp=nxp*ny*nz)
double precision ntotal_f
parameter (ntotal_f=dble(nx)*ny*nz)
! If processor array is 1x1 -> 0D grid decomposition
! Cache blocking params. These values are good for most
! RISC processors.
! FFT parameters:
! fftblock controls how many ffts are done at a time.
! The default is appropriate for most cache-based machines
! On vector machines, the FFT can be vectorized with vector
! length equal to the block size, so the block size should
! be as large as possible. This is the size of the smallest
! dimension of the problem: 128 for class A, 256 for class B and
! 512 for class C.
include 'blk_par.h'
! integer fftblock_default, fftblockpad_default
! parameter (fftblock_default=32, fftblockpad_default=34)
integer fftblock, fftblockpad
! we need a bunch of logic to keep track of how
! arrays are laid out.
! Note: this serial version is the derived from the parallel 0D case
! of the ft NPB.
! The computation proceeds logically as
! set up initial conditions
! fftx(1)
! transpose (1->2)
! ffty(2)
! transpose (2->3)
! fftz(3)
! time evolution
! fftz(3)
! transpose (3->2)
! ffty(2)
! transpose (2->1)
! fftx(1)
! compute residual(1)
! for the 0D, 1D, 2D strategies, the layouts look like xxx
!
! 0D 1D 2D
! 1: xyz xyz xyz
! the array dimensions are stored in dims(coord, phase)
integer dims(3)
integer T_total, T_setup, T_fft, T_evolve, T_checksum, &
& T_fftx, T_ffty, &
& T_fftz, T_max
parameter (T_total = 1, T_setup = 2, T_fft = 3, &
& T_evolve = 4, T_checksum = 5, &
& T_fftx = 6, &
& T_ffty = 7, &
& T_fftz = 8, T_max = 8)
logical timers_enabled
external timer_read
double precision timer_read
external ilog2
integer ilog2
external randlc
double precision randlc
! other stuff
logical debug, debugsynch
double precision seed, a, pi, alpha
parameter (seed = 314159265.d0, a = 1220703125.d0, &
& pi = 3.141592653589793238d0, alpha=1.0d-6)
! roots of unity array
! relies on x being largest dimension?
double complex u(nxp)
! for checksum data
double complex sums(0:niter_default)
! number of iterations
integer niter
end module ft_data
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! ft_fields module
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
module ft_fields
!---------------------------------------------------------------------
! u0, u1, u2 are the main arrays in the problem.
! Depending on the decomposition, these arrays will have different
! dimensions. To accomodate all possibilities, we allocate them as
! one-dimensional arrays and pass them to subroutines for different
! views
! - u0 contains the initial (transformed) initial condition
! - u1 and u2 are working arrays
! - twiddle contains exponents for the time evolution operator.
!---------------------------------------------------------------------
double complex, allocatable :: &
& u0(:), pad1(:), &
& u1(:), pad2(:)
! > u2(:)
double precision, allocatable :: twiddle(:)
!---------------------------------------------------------------------
! Large arrays are in module so that they are allocated on the
! heap rather than the stack. This module is not
! referenced directly anywhere else. Padding is to avoid accidental
! cache problems, since all array sizes are powers of two.
!---------------------------------------------------------------------
end module ft_fields
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine alloc_space
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! allocate space dynamically for data arrays
!---------------------------------------------------------------------
use ft_data, only : ntotalp
use ft_fields
implicit none
integer ios
allocate ( &
& u0(ntotalp), pad1(3), &
& u1(ntotalp), pad2(3), &
! > u2(ntotalp),
& twiddle(ntotalp), &
& stat = ios)
if (ios .ne. 0) then
write(*,*) 'Error encountered in allocating space'
stop
endif
return
end
6 ! number of iterations
2 ! layout type. 0 = 0d, 1 = 1d, 2 = 2d
2 4 ! processor layout. 0d must be "1 1"; 1d must be "1 N"
! CLASS = C
!
!
! This file is generated automatically by the setparams utility.
! It sets the number of processors and the class of the NPB
! in this directory. Do not modify it by hand.
!
integer nx, ny, nz, maxdim, niter_default
parameter (nx=512, ny=512, nz=512, maxdim=512)
parameter (niter_default=20)
integer kind2
parameter (kind2=4)
logical convertdouble
parameter (convertdouble = .false.)
character compiletime*11
parameter (compiletime='13 Jun 2024')
character npbversion*5
parameter (npbversion='3.4.2')
character cs1*8
parameter (cs1='gfortran')
character cs2*5
parameter (cs2='$(FC)')
character cs3*6
parameter (cs3='(none)')
character cs4*6
parameter (cs4='(none)')
character cs5*12
parameter (cs5='-O3 -fopenmp')
character cs6*9
parameter (cs6='$(FFLAGS)')
character cs7*6
parameter (cs7='randi8')
subroutine print_results(name, class, n1, n2, n3, niter, &
& t, mops, optype, verified, npbversion, &
& compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
implicit none
character(len=*) name
character class
integer n1, n2, n3, niter, j
double precision t, mops
character optype*24, size*15
logical verified
character(len=*) npbversion, compiletime, &
& cs1, cs2, cs3, cs4, cs5, cs6, cs7
integer num_threads, max_threads
!$ integer omp_get_num_threads, omp_get_max_threads
!$ external omp_get_num_threads, omp_get_max_threads
max_threads = 0
!$ max_threads = omp_get_max_threads()
! figure out number of threads used
num_threads = 0
!$omp parallel shared(num_threads)
!$omp master
!$ num_threads = omp_get_num_threads()
!$omp end master
!$omp end parallel
write (*, 2) name
2 format(//, ' ', A, ' Benchmark Completed.')
write (*, 3) Class
3 format(' Class = ', 12x, a12)
! If this is not a grid-based problem (EP, FT, CG), then
! we only print n1, which contains some measure of the
! problem size. In that case, n2 and n3 are both zero.
! Otherwise, we print the grid size n1xn2xn3
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
if (name(1:2) .eq. 'EP') then
write(size, '(f15.0)' ) 2.d0**n1
j = 15
if (size(j:j) .eq. '.') j = j - 1
write (*,42) size(1:j)
42 format(' Size = ',9x, a15)
else
write (*,44) n1
44 format(' Size = ',12x, i12)
endif
else
write (*, 4) n1,n2,n3
4 format(' Size = ',9x, i4,'x',i4,'x',i4)
endif
write (*, 5) niter
5 format(' Iterations = ', 12x, i12)
write (*, 6) t
6 format(' Time in seconds = ',12x, f12.2)
if (num_threads .gt. 0) write (*,7) num_threads
7 format(' Total threads = ', 12x, i12)
if (max_threads .gt. 0) write (*,8) max_threads
8 format(' Avail threads = ', 12x, i12)
if (num_threads .ne. max_threads) write (*,88)
88 format(' Warning: Threads used differ from threads available')
write (*,9) mops
9 format(' Mop/s total = ',12x, f12.2)
if (num_threads .gt. 0) write (*,10) mops/float( num_threads )
10 format(' Mop/s/thread = ', 12x, f12.2)
write(*, 11) optype
11 format(' Operation type = ', a24)
if (verified) then
write(*,12) ' SUCCESSFUL'
else
write(*,12) 'UNSUCCESSFUL'
endif
12 format(' Verification = ', 12x, a)
write(*,13) npbversion
13 format(' Version = ', 12x, a12)
write(*,14) compiletime
14 format(' Compile date = ', 12x, a12)
write (*,121) cs1
121 format(/, ' Compile options:', /, &
& ' FC = ', A)
write (*,122) cs2
122 format(' FLINK = ', A)
write (*,123) cs3
123 format(' F_LIB = ', A)
write (*,124) cs4
124 format(' F_INC = ', A)
write (*,125) cs5
125 format(' FFLAGS = ', A)
write (*,126) cs6
126 format(' FLINKFLAGS = ', A)
write(*, 127) cs7
127 format(' RAND = ', A)
write (*,130)
130 format(//' Please send all errors/feedbacks to:'// &
& ' NPB Development Team'/ &
& ' npb@nas.nasa.gov'//)
! 130 format(//' Please send the results of this run to:'//
! > ' NPB Development Team '/
! > ' Internet: npb@nas.nasa.gov'/
! > ' '/
! > ' If email is not available, send this to:'//
! > ' MS T27A-1'/
! > ' NASA Ames Research Center'/
! > ' Moffett Field, CA 94035-1000'//
! > ' Fax: 650-604-3957'//)
return
end
double precision function randlc(x, a)
!---------------------------------------------------------------------
!
! This routine returns a uniform pseudorandom double precision number in the
! range (0, 1) by using the linear congruential generator
!
! x_{k+1} = a x_k (mod 2^46)
!
! where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
! before repeating. The argument A is the same as 'a' in the above formula,
! and X is the same as x_0. A and X must be odd double precision integers
! in the range (1, 2^46). The returned value RANDLC is normalized to be
! between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
! the new seed x_1, so that subsequent calls to RANDLC using the same
! arguments will generate a continuous sequence.
implicit none
double precision x, a
integer(kind=8) i246m1, Lx, La
double precision d2m46
parameter(d2m46=0.5d0**46)
parameter(i246m1=INT(Z'00003FFFFFFFFFFF',8))
Lx = X
La = A
Lx = iand(Lx*La,i246m1)
randlc = d2m46*dble(Lx)
x = dble(Lx)
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
SUBROUTINE VRANLC (N, X, A, Y)
implicit none
integer n, i
double precision x, a, y(*)
integer(kind=8) i246m1, Lx, La
double precision d2m46
! This doesn't work, because the compiler does the calculation in 32
! bits and overflows. No standard way (without f90 stuff) to specify
! that the rhs should be done in 64 bit arithmetic.
! parameter(i246m1=2**46-1)
parameter(d2m46=0.5d0**46)
parameter(i246m1=INT(Z'00003FFFFFFFFFFF',8))
Lx = X
La = A
do i = 1, N
Lx = iand(Lx*La,i246m1)
y(i) = d2m46*dble(Lx)
end do
x = dble(Lx)
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
module timers
double precision start(64), elapsed(64)
!$omp threadprivate(start, elapsed)
double precision, external :: elapsed_time
end module timers
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine timer_clear(n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
use timers
implicit none
integer n
elapsed(n) = 0.0
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine timer_start(n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
use timers
implicit none
integer n
start(n) = elapsed_time()
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine timer_stop(n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
use timers
implicit none
integer n
double precision t, now
now = elapsed_time()
t = now - start(n)
elapsed(n) = elapsed(n) + t
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
double precision function timer_read(n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
use timers
implicit none
integer n
timer_read = elapsed(n)
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
double precision function elapsed_time()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
!$ external omp_get_wtime
!$ double precision omp_get_wtime
double precision t
logical mp
! ... Use the OpenMP timer if we can (via C$ conditional compilation)
mp = .false.
!$ mp = .true.
!$ t = omp_get_wtime()
if (.not.mp) then
! This function must measure wall clock time, not CPU time.
! Since there is no portable timer in Fortran (77)
! we call a routine compiled in C (though the C source may have
! to be tweaked).
call wtime(t)
! The following is not ok for "official" results because it reports
! CPU time not wall clock time. It may be useful for developing/testing
! on timeshared Crays, though.
! call second(t)
endif
elapsed_time = t
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine check_timer_flag( timeron )
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
logical timeron
integer nc, ios
character(len=20) val
timeron = .false.
! ... Check environment variable "NPB_TIMER_FLAG"
call get_environment_variable('NPB_TIMER_FLAG', val, nc, ios)
if (ios .eq. 0) then
if (nc .le. 0) then
timeron = .true.
else if (val(1:1) .ge. '1' .and. val(1:1) .le. '9') then
timeron = .true.
else if (val .eq. 'on' .or. val .eq. 'ON' .or. &
& val .eq. 'yes' .or. val .eq. 'YES' .or. &
& val .eq. 'true' .or. val .eq. 'TRUE') then
timeron = .true.
endif
else
! ... Check if the "timer.flag" file exists
open (unit=2, file='timer.flag', status='old', iostat=ios)
if (ios .eq. 0) then
close(2)
timeron = .true.
endif
endif
return
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment