Skip to content

Commit

Permalink
Creating nekbone repo from https://asc.llnl.gov/coral-2-benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
asarkar-parsys committed Feb 18, 2022
0 parents commit 72b42d7
Show file tree
Hide file tree
Showing 88 changed files with 50,129 additions and 0 deletions.
41 changes: 41 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
***********************************************************
* Changes in 2.0 *
***********************************************************
-Subroutine gsync() has changed to nekgsync() to avoid
possible conflict on certain architectures

-Executable is renamed 'nekbone' to replace 'nekproxy' and
other naming changes.

-iel0 and ielN set in data.rea file are now used to control the
range of tests ran. Test range in size from iel0
to ielN elements per process. (prevoiusly tests
were ran from 1 to lelt elements per process) The
maximum value of ielN is lelt.

-nx0 and nxN set in data.rea file are now used to control the
range of polynomial orders. Ranging from nx0 to
nxN, where nxN<=lx1 (which is set in SIZE). Previously
tests only ran with nx1=lx1. The default is set to
reflect this, but nekbone now supports a range of
polynomial orders without recompiling the code.
***********************************************************
* Changes in 2.1 *
***********************************************************
-Fixed nx0 and nxN control of polynomial order. Default is
now to use lx1 until further notice. Variable
nx1 caused memory unstabilities and needs further
development.
-Fixed a memory copy bug in the jl/ array transfer code.
sarray_trasfer, used for the tuple transfer, should
be fixed now.

***********************************************************
* Changes in 2.3 *
***********************************************************
- added OpenMP parallelism, MPITHREADS preprocessor macro
controls if MPI is called from one or multiple threads
- added timers controlled by TIMERS preprocessor macro
- fixed gather-scatter operation gsop() to always use pairwise
method
- switched to using system_clock routine in dummy mpi_wtime()
34 changes: 34 additions & 0 deletions COPYRIGHT
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
COPYRIGHT

The following is a notice of limited availability of the code, and disclaimer
which must be included in the prologue of the code and in all source listings
of the code.

Copyright Notice
+ 2012 University of Chicago

Permission is hereby granted to use, reproduce, prepare derivative works, and
to redistribute to others. This software was authored by:

P. Fischer: (630) 252-6018; FAX: (630) 252-5986; email: [email protected]
Mathematics and Computer Science Division
Argonne National Laboratory, Argonne IL 60439

GOVERNMENT LICENSE

Portions of this material resulted from work developed under a U.S.
Government Contract and are subject to the following license: the Government
is granted for itself and others acting on its behalf a paid-up, nonexclusive,
irrevocable worldwide license in this computer software to reproduce, prepare
derivative works, and perform publicly and display publicly.

DISCLAIMER

This computer code material was prepared, in part, as an account of work
sponsored by an agency of the United States Government. Neither the United
States, nor the University of Chicago, nor any of their employees, makes any
warranty express or implied, or assumes any legal liability or responsibility
for the accuracy, completeness, or usefulness of any information, apparatus,
product, or process disclosed, or represents that its use would not infringe
privately owned rights.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# nekbone_2_3_5
Binary file added readme.pdf
Binary file not shown.
5 changes: 5 additions & 0 deletions src/DXYZ
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
C
C Elemental derivative operators
C
common /dxyz/ dxm1(lx1,lx1), dxtm1(lx1,lx1)

19 changes: 19 additions & 0 deletions src/INPUT
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
C
C Input parameters from preprocessors.
C
C Note that in parallel implementations, we distinguish between
C distributed data (LELT) and uniformly distributed data.
C

common /input5/ xc(8,lelt),yc(8,lelt),zc(8,lelt)
$ ,bc(5,6,lelt,0:ldimt1)


common /input8/ cbc(6,lelt,0:ldimt1),ccurve(12,lelt)
character*1 ccurve
character*3 cbc

real mflops
integer*8 flop_a, flop_cg
common /cflops/ flop_a,flop_cg,mflops

4 changes: 4 additions & 0 deletions src/MASS
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
common /mass/
$ bm1 (lx1,ly1,lz1,lelt)
$ ,binvm1(lx1,ly1,lz1,lelt)
$ ,volvm1
31 changes: 31 additions & 0 deletions src/PARALLEL
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
C
C Communication information
C NOTE: NID is stored in 'SIZE' for greater accessibility
common /cube1/ node,pid,np,nullpid,node0
integer node,pid,np,nullpid,node0


c Maximum number of elements (limited to 2**31/12, at least for now)
parameter(nelgt_max = 178956970)

common /hcglb/ nvtot,nelgf(0:ldimt1)
$ ,lglel(lelt)
c $ ,gllel(lelg)
c $ ,gllnid(lelg)
$ ,nelgv,nelgt

integer lglel
c integer gllel,gllnid
integer*8 nvtot

common /diagl/ ifgprnt
logical ifgprnt
common/precsn/ wdsize,isize,lsize,csize
common/precsl/ ifdblas
integer wdsize,isize,lsize,csize
logical ifdblas
C
C crystal-router, gather-scatter, and xxt handles (xxt=csr grid solve)
C
common /comm_handles/ cr_h, gsh, gsh_fld(0:ldimt1), xxth(ldimt1)
integer cr_h, gsh, gsh_fld , xxth
66 changes: 66 additions & 0 deletions src/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
Nek_comm-1.0

This is the communication testing kernel for the MPI
all reduce and point to point communication used within
the nekbone mini-app and standard NEK5000. This kernel
runs a battery of platform timers using MPI standard.

To Run:

NOTE - Unlike the other nek codes, a data.rea file
is not needed, since there is no geometry
being set up.

After untarring nek_comm-1.0.tgz, change working
directory to the nek_comm/test/ directory:
cd ~/nek_comm/test/

Edit the makenek script to specify the compiler and
appropriate compiler flags.

Compile and link the code using the makenek script:
./makenek n
where n is the chosen name of test run.

A successful compilation will result with:

#############################################################
# Compilation successful! #
#############################################################

And a nekcomm executable.

Run the code in parallel using the provided nekpmpi script
by specifying the test name (for logfile naming purposes)
and the number of processors. For example, to run a test
called 'n' on 4 processes:
./nekpmpi n 4

This will produce a logfile, n.log.4.


Interpreting results:

The logfile will have a header describing the parameters the
test was ran with and the output of the timing tests.

All reduce tests are output with the a 'gop' tag:
np nwds time1 time2
where,
np - number of processors
nwds - number of words
tmsg - time per message
tpwd - time per word


Point to point tests are output with the 'pg' tag:
nodeb np nloop nwds tmsg tpwd
where,
nodeb - the second processor node 0 is
testing with
np - number of processors
nloop - number of tests ran with these nodes
nwds - number of words per message
tmsg - time per message
tpwd - time per word

19 changes: 19 additions & 0 deletions src/TIMER
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

integer tmax
parameter (tmax = 1024)

integer gopi(tmax)

real ttemp1, ttemp2, ttemp3, ttemp4

real trzero(tmax), tcopy(tmax), tsolvem(tmax)
real tglsc3a(tmax), tglsc3b(tmax), tglsc3c(tmax), tglsc3d(tmax)
real tadd2s1(tmax), tadd2s2a(tmax), tadd2s2b(tmax), tadd2s2c(tmax)
real tlocalgrad3(tmax), twrwswt(tmax), tlocalgrad3t(tmax)
real tgsop(tmax), tgop(4,tmax)

real*8 dnekclock

common /timer/ trzero, tcopy, tsolvem, tglsc3a, tglsc3b, tglsc3c,
+tglsc3d, tadd2s1, tadd2s2a, tadd2s2b, tadd2s2c, tlocalgrad3,
+twrwswt, tlocalgrad3t, tgsop, tgop, gopi
5 changes: 5 additions & 0 deletions src/TOTAL
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
include 'DXYZ'
include 'INPUT'
include 'MASS'
include 'PARALLEL'
include 'WZ'
7 changes: 7 additions & 0 deletions src/WZ
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@


c Gauss-Labotto and Gauss points
common /gauss/ zgm1(lx1,3)

c Weights
common /wxyz/ wxm1(lx1), wym1(ly1), wzm1(lz1), w3m1(lx1,ly1,lz1)
41 changes: 41 additions & 0 deletions src/bg_aligned3.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4
.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9
.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14
.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19
.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24
.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29
.set r30,30; .set r31,31
.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4
.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9
.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14
.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19
.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24
.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29
.set f30,30; .set f31,31

.file "bg_aligned3.s"

.globl bg_aligned3
.type bg_aligned3, @function
.size bg_aligned3, 48

.section ".text"
.align 2

bg_aligned3:
andi. r0,r3,15
clrlwi r9,r4,28
cmpwi cr7,r9,0
li r3,0
li r0,0
bne- .L.das_label.58
andi. r9,r5,15
bne- cr7,.L.das_label.58
bne- .L.das_label.58
li r0,1
.L.das_label.58:
stw r0,0(r6)
blr


.ident "GCC: (GNU) 3.2"
Loading

0 comments on commit 72b42d7

Please sign in to comment.