From 72b42d711627274117675b83cb60186fdbabc038 Mon Sep 17 00:00:00 2001 From: Abhik Sarkar Date: Fri, 18 Feb 2022 14:24:43 -0800 Subject: [PATCH] Creating nekbone repo from https://asc.llnl.gov/coral-2-benchmarks --- CHANGES | 41 + COPYRIGHT | 34 + README.md | 1 + readme.pdf | Bin 0 -> 135910 bytes src/DXYZ | 5 + src/INPUT | 19 + src/MASS | 4 + src/PARALLEL | 31 + src/README | 66 + src/TIMER | 19 + src/TOTAL | 5 + src/WZ | 7 + src/bg_aligned3.s | 41 + src/bg_mxm3.s | 406 + src/bg_mxm44.s | 497 + src/bg_mxm44_uneven.s | 82 + src/blas.f | 30886 ++++++++++++++++++++++++++++++ src/byte_mpi.f | 209 + src/cg.f | 335 + src/comm_mpi.f | 1212 ++ src/driver.f | 660 + src/driver_comm.f | 21 + src/jl/Makefile | 91 + src/jl/README | 69 + src/jl/c99.h | 16 + src/jl/cdep.py | 33 + src/jl/comm.c | 175 + src/jl/comm.h | 255 + src/jl/crs.h | 24 + src/jl/crs_test.c | 116 + src/jl/crystal.c | 141 + src/jl/crystal.h | 21 + src/jl/crystal_test.c | 88 + src/jl/fail.c | 53 + src/jl/fail.h | 52 + src/jl/fcrystal.c | 191 + src/jl/gen_poly_imp.c | 227 + src/jl/gs.c | 1503 ++ src/jl/gs.h | 141 + src/jl/gs_defs.h | 81 + src/jl/gs_local.c | 336 + src/jl/gs_local.h | 43 + src/jl/gs_test.c | 68 + src/jl/gs_test_old.c | 147 + src/jl/gs_unique_test.c | 72 + src/jl/makefile.cdep | 48 + src/jl/mem.h | 168 + src/jl/name.h | 44 + src/jl/odep_info.py | 50 + src/jl/rand_elt_test.c | 169 + src/jl/rand_elt_test.h | 18 + src/jl/rdtsc.h | 12 + src/jl/sarray_sort.c | 45 + src/jl/sarray_sort.h | 89 + src/jl/sarray_sort_test.c | 47 + src/jl/sarray_transfer.c | 197 + src/jl/sarray_transfer.h | 95 + src/jl/sarray_transfer_test.c | 93 + src/jl/sort.c | 31 + src/jl/sort.h | 76 + src/jl/sort_imp.h | 543 + src/jl/sort_test.c | 113 + src/jl/sort_test2.c | 74 + src/jl/spchol_test.c | 54 + src/jl/tensor.c | 82 + src/jl/tensor.h | 199 + src/jl/types.h | 79 + src/k10_mxm.c | 56 + src/makenek.inc | 324 + src/math.f | 1402 ++ src/mpi_dummy.f | 1053 + src/mpi_dummy.h | 61 + src/mxm_std.f | 4123 ++++ src/mxm_wrapper.f | 165 + src/omp.f | 128 + src/prox_dssum.f | 174 + src/prox_setup.f | 113 + src/semhat.f | 94 + src/speclib.f | 1176 ++ src/timers.c | 24 + test/example1/SIZE | 17 + test/example1/data.rea | 5 + test/example1/makefile.template | 140 + test/example1/makenek | 55 + test/example1/makenek-bgq | 55 + test/example1/makenek-cray-knl | 55 + test/example1/makenek-intel | 55 + test/example1/nekpmpi | 4 + 88 files changed, 50129 insertions(+) create mode 100644 CHANGES create mode 100644 COPYRIGHT create mode 100644 README.md create mode 100644 readme.pdf create mode 100644 src/DXYZ create mode 100644 src/INPUT create mode 100644 src/MASS create mode 100644 src/PARALLEL create mode 100644 src/README create mode 100644 src/TIMER create mode 100644 src/TOTAL create mode 100644 src/WZ create mode 100644 src/bg_aligned3.s create mode 100644 src/bg_mxm3.s create mode 100644 src/bg_mxm44.s create mode 100644 src/bg_mxm44_uneven.s create mode 100644 src/blas.f create mode 100644 src/byte_mpi.f create mode 100644 src/cg.f create mode 100644 src/comm_mpi.f create mode 100644 src/driver.f create mode 100644 src/driver_comm.f create mode 100644 src/jl/Makefile create mode 100644 src/jl/README create mode 100644 src/jl/c99.h create mode 100755 src/jl/cdep.py create mode 100644 src/jl/comm.c create mode 100644 src/jl/comm.h create mode 100644 src/jl/crs.h create mode 100644 src/jl/crs_test.c create mode 100644 src/jl/crystal.c create mode 100644 src/jl/crystal.h create mode 100644 src/jl/crystal_test.c create mode 100644 src/jl/fail.c create mode 100644 src/jl/fail.h create mode 100644 src/jl/fcrystal.c create mode 100644 src/jl/gen_poly_imp.c create mode 100644 src/jl/gs.c create mode 100644 src/jl/gs.h create mode 100644 src/jl/gs_defs.h create mode 100644 src/jl/gs_local.c create mode 100644 src/jl/gs_local.h create mode 100644 src/jl/gs_test.c create mode 100644 src/jl/gs_test_old.c create mode 100644 src/jl/gs_unique_test.c create mode 100644 src/jl/makefile.cdep create mode 100644 src/jl/mem.h create mode 100644 src/jl/name.h create mode 100755 src/jl/odep_info.py create mode 100644 src/jl/rand_elt_test.c create mode 100644 src/jl/rand_elt_test.h create mode 100644 src/jl/rdtsc.h create mode 100644 src/jl/sarray_sort.c create mode 100644 src/jl/sarray_sort.h create mode 100644 src/jl/sarray_sort_test.c create mode 100644 src/jl/sarray_transfer.c create mode 100644 src/jl/sarray_transfer.h create mode 100644 src/jl/sarray_transfer_test.c create mode 100644 src/jl/sort.c create mode 100644 src/jl/sort.h create mode 100644 src/jl/sort_imp.h create mode 100644 src/jl/sort_test.c create mode 100644 src/jl/sort_test2.c create mode 100644 src/jl/spchol_test.c create mode 100644 src/jl/tensor.c create mode 100644 src/jl/tensor.h create mode 100644 src/jl/types.h create mode 100644 src/k10_mxm.c create mode 100644 src/makenek.inc create mode 100644 src/math.f create mode 100644 src/mpi_dummy.f create mode 100644 src/mpi_dummy.h create mode 100644 src/mxm_std.f create mode 100644 src/mxm_wrapper.f create mode 100644 src/omp.f create mode 100644 src/prox_dssum.f create mode 100644 src/prox_setup.f create mode 100644 src/semhat.f create mode 100644 src/speclib.f create mode 100644 src/timers.c create mode 100644 test/example1/SIZE create mode 100644 test/example1/data.rea create mode 100644 test/example1/makefile.template create mode 100755 test/example1/makenek create mode 100755 test/example1/makenek-bgq create mode 100755 test/example1/makenek-cray-knl create mode 100755 test/example1/makenek-intel create mode 100755 test/example1/nekpmpi diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..28b5f6b --- /dev/null +++ b/CHANGES @@ -0,0 +1,41 @@ +*********************************************************** +* Changes in 2.0 * +*********************************************************** + -Subroutine gsync() has changed to nekgsync() to avoid + possible conflict on certain architectures + + -Executable is renamed 'nekbone' to replace 'nekproxy' and + other naming changes. + + -iel0 and ielN set in data.rea file are now used to control the + range of tests ran. Test range in size from iel0 + to ielN elements per process. (prevoiusly tests + were ran from 1 to lelt elements per process) The + maximum value of ielN is lelt. + + -nx0 and nxN set in data.rea file are now used to control the + range of polynomial orders. Ranging from nx0 to + nxN, where nxN<=lx1 (which is set in SIZE). Previously + tests only ran with nx1=lx1. The default is set to + reflect this, but nekbone now supports a range of + polynomial orders without recompiling the code. +*********************************************************** +* Changes in 2.1 * +*********************************************************** + -Fixed nx0 and nxN control of polynomial order. Default is + now to use lx1 until further notice. Variable + nx1 caused memory unstabilities and needs further + development. + -Fixed a memory copy bug in the jl/ array transfer code. + sarray_trasfer, used for the tuple transfer, should + be fixed now. + +*********************************************************** +* Changes in 2.3 * +*********************************************************** + - added OpenMP parallelism, MPITHREADS preprocessor macro + controls if MPI is called from one or multiple threads + - added timers controlled by TIMERS preprocessor macro + - fixed gather-scatter operation gsop() to always use pairwise + method + - switched to using system_clock routine in dummy mpi_wtime() diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..9f84af5 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,34 @@ + COPYRIGHT + +The following is a notice of limited availability of the code, and disclaimer +which must be included in the prologue of the code and in all source listings +of the code. + +Copyright Notice + + 2012 University of Chicago + +Permission is hereby granted to use, reproduce, prepare derivative works, and +to redistribute to others. This software was authored by: + +P. Fischer: (630) 252-6018; FAX: (630) 252-5986; email: fischer@mcs.anl.gov +Mathematics and Computer Science Division +Argonne National Laboratory, Argonne IL 60439 + + GOVERNMENT LICENSE + +Portions of this material resulted from work developed under a U.S. +Government Contract and are subject to the following license: the Government +is granted for itself and others acting on its behalf a paid-up, nonexclusive, +irrevocable worldwide license in this computer software to reproduce, prepare +derivative works, and perform publicly and display publicly. + + DISCLAIMER + +This computer code material was prepared, in part, as an account of work +sponsored by an agency of the United States Government. Neither the United +States, nor the University of Chicago, nor any of their employees, makes any +warranty express or implied, or assumes any legal liability or responsibility +for the accuracy, completeness, or usefulness of any information, apparatus, +product, or process disclosed, or represents that its use would not infringe +privately owned rights. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..fab3aad --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# nekbone_2_3_5 diff --git a/readme.pdf b/readme.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8c8aa70dce64d369b042400913963b32eea91d06 GIT binary patch literal 135910 zcma%?Q*bWavbAH|wr$(CZQEY4ZQHi<##phjVmn!}lmFYh&N=_ZzS&oER^N`U>hV;M zCRY-Zpl4=agCQTkN+^b5Ct@OUG_ir<<71RFcd&G`BI5X`qQWR)ZRck0Lc}OxXY6J! zW^U?eW-cHAMI#%)1P(S|=B&#h4%Dim0G67Catt%B4e zM2+X=n>?*{DO1Qj-LL)8@{8A%kjOaj+wS(uy1R4L>4izSGv|g9?^pPBn_$>f8ThhL zqL|dsy|oT1Gt#kGX^?zMcGxjnUxK3tD;b44&0V!ElZbF!!+QeBNa~^anwSbX;Ciu^ z+usdBvjf_B( z2xt_u{;WWl6OG~SS?*;dZm7)V(;tBw!IM44__Nt&TJ3O${h5gkQ>aVZK!L|X)R;mc zJk+?${^0@NcfP~5?m81jE4QcKF6=yNknr@cyo&CEX%6X%sb3Dk!xZ<+-R?G8`z)F3 zO}oXiS>Q~Di%M+`eZNkBZG-8-N~OwYXO1)~)QRNx*&w&zW1I1X=7En8nC>*{-onh) zUqa1I5-@p9GV)2h-&ryUA*T-xe-FC-@h0$vzFhaVRv|Gg?C@&3aRT@$AVUl5IHZ&H zKE5VrirYaz|D0_xxI4dZ;0)PoAH@$^7i32`i0(UA)Tf{8`+kEQG){Y)JDB~Ci2wZl zCH%kS{@2UK#tQR4CFcKZ3g-W+E3S3qUC}s^`sNxkQQ32<^D3WKR2+i1LeYr*>==q% zhz#tYYVGaxl-{1KrCDoMy)1emcn8&@L)2Svq_-PBo^0W7Z5Bl6hx%`BzsujBsaGQ` z(;WL61l5WGhT3Ji4gU6dK^iMLlT?M2<-h*6`$#+>)Oezgv|ZfHeU}4zil2aN2Q-42 zpDqezaE0yFYYwX=TCLRk{;196_{Nl3wK<&?awC|6Qq?)eW5DPjLCt);q74Sx7=$;8 z9Eu5o_R?D9hBYlo1b>-6xwdsw=bO2fdaxyIF7ZP{%wU7EZ2)mXTZmdQOqKP8Y z=2*T_|GE?3lA@I^8uFhx{R?U~P6;YUiwu3Sk{zgC=8c9H3<=H+qZ{09AY)aRCeW9b zOG4f5z7)=^6Rn3m1QwcDr{d-XgDQQOJqxnByzpN-I?1~v?1Bnr5jG(HSt&etN#Z4g7WDg2 zq{>C5kuUj@?$57X(-vc2XNua|u`7EA{W6drG?1YE=6Fi92 zTfDj)#-2reZNwJcA-(3CO0`8Vb!91A5?B$ry`@*IV{21Fyi_Y3mcEPLa&UaoP@iuU z0-Qh!&N{gVaDZuTBQhSzgtPRPvnGcmq)vg^GLzNrvFQ|*+f@5`OOnl`WP(s=HMa@Q z^-luDD?O^;zSMYSfxo2)LUSs7WYNEW8OQ+JL-dMO(c@pmuaJcknaft9O~fq%pT>Nt z`M9Z2@h{-DU_V6|Wsv1lty9;;8S2%^9tgsfR3_boDTsU)*;9X*vA6N6F;xNtQa4W5U zQBkCT^S1LhKDQc-cj32-@&Lf-JXXHbNa^oOp{qs1C`D>qkVI)lg9ezzxRn=?6)mT^ zqvQD|Yo6;VJ{jDX^w{#eR7r1+Sh<#q!^m>zq8B$ z1wSL7hH=WVCs&k_Ud^Ms{Tk^>Zj2;7t;dKLqW1O^E8$sWD5z4dl8!@3%QYX7SDL^B zeoY1vDPRhtD!KI~u0F$8U!953K;Q+R9b}b4O`!eo=CY}Z)Lvc_-54P*&GF?&L$)0m z4QHe{8P+CJhq&*!YH?4JRmBM#|EUDS(RTu9t2K(M{NPw{#bu-1@XhTpo=nqFA{Dg_ z?Ka}|Lwgwm-Bvo}3%{{!e{!K>Do!IyV|@7{H5`-C7?$EEB+5@3h_Opw#F80UkOyQN zw2q-=Adz0x42KbbW)^()R@q+wxgouwcIwuQaqdx=kTIK`ll^# zf0d_^IUSj)vKc&ZH%r1614GUphr{LYZZoK0MCT9B!1GE2lUd8KSyU~Yh#Pl zZqA!5xP(R)6%$_E#f9MIlqrI0n=Y_471DXFvG)ELTYKDXLEkJeE0J|##AE`7@}4-#uaB9(mYh4&#Aq}|E|`<) zGcT=uD@HU|S5+~cddXtZJdGnR=&XLxTm0e6-!}2Nf^RQ$Jow|b_DZnhm_gzIqqM5{ zlYAg1J;(kQ5?|(F)5%Fp7AoO-H@w{lx@q~Hzv$)nO=K0!@Yg^QIyy?jVIBmcn+&+c zcbY0_KG6t7dt4}$(rzUdLsRV7&t3m`5)u---GgB^OTvBi|Ky{Dhtbh zxGWad|Hfv`=t{fdcOV1$4e#QIQ@J5wqV~=eguAXecWel8VX(wVGVtMDO~+@x5y+2NAx{O z#M}J1Sf-93v%+uP1dMsF^%#1xEdDv|pEbPc=MnZiuoAiYqhWCf9V++JlNQghC%3T@uC-7M8+LzB|9NbHleNP7w%G`fi zXmWu_FoMLc2vjpbBvl#wT>>-gnN&D}3g;;alR%u8JwZ*)o`h5ajGK#ZK+Mgp)3EK7 z)DBrf2j|GmczQ=TQO{0Zc6z5vWlOG{-PJWEM`I}WD=j=tV;GUGwhUERVLSz+qPNiT zlDydiLwiYOJ^OixLihbPrT-ehz46sbgk@n?SRo zU3Lez7l&y4N-?PhQ}Xzi-@gZF63%ec)Q2$_?;AV8BDV=(P@Rh(!ZzIErH~tM36Mj% z#|ZP90=&IiZ>iHF8GA(@f<(a2L^RabZ7G{@Ts^ z1Ww2XgRitob1O~zTgj`2;$^n+CN2Gnokn)&$I}dgA+cy87Sl2YOXP6Kbn%v(Bw)^V zV~K+-N~Kst56qeXP$-djrAw6~O{S=UTN>gzrl1G2M^&`K;z9b!!rSGURDr-oTFbqB zIA;Q64t4D7(f6LP+8jpz-X5L@MzRH0jgezS!G$;y3sM;6KDhVF%QBb^UXH};hGndy z9nq!F9dC$t(y~4^KY?CBEW4qQm(3_jEZ4(F2BbQ10a?jWVp{R|O|_eFk@kxZ1if$t z;3WVdm6CRkqwbrsIoB;*ar?7Daw@Y>NKp*a(bI?0sD0i*(TND+CZ(4kmLvWeek6V0 zJ-d^jm7RBTm0kp|fNJjSe>{8IB@Dt{I8B?sDk8Ag^$nYSU2@jlpoJ#YlHD?*B5L5R zLKyvhYQTEEOXebyQMfnXP4&1dwu>n?8js`$zVA&ANwRxp4i}#N3NW;A{gZZ?`Sg(J z1RJgQ7OD&Q*buqpfG@#4Jwg#+4MHgp%uUYD6L$3(M;j?yPZU@4VF5btQyHFXi-~3B zYBmt#(hYpiEtLma3Ol-1iLn%b-*;+noUUM4ahK} zrXTYe-lDdr0CketQf3Vz*RlYCPM09(S!iW12Z!iGB-c2Lka_VM1m&H9FAlXNH9=-v zv*T(=RiAZA(*r+m%7fTyfMaM@13$B17z7?T?Ql(x@MDAg0+K_GT>)>yQ)#=j08`v^ zcTtd5>(1PXdiOIQhjxK{{)i4@%S-VF3>2vL(qa)|08{Bo7ZD84hfaG+LVOiuk!Pj5 zGGqqbQQO|)ST=0gZq*WgRviUuAvBrUlB#C2kDeyJQO8E%e0PX>8A#p18NmgF37KG^ z*>wrka)qso4t-w@=^n}?NFU2D>9VFVNPLSQMBc9^Afh2gL7^v9XyfliIPw~#2eGr- zEZ{r;ZTgK@-G7Q&eZSW)L0)!rcP-M0_=@%nTJ0<04EiobI{w1e5^8jg9*=Hef#Ajm z$Y}DK^Ky=!{vnY9%Hu;WZtkd4%Yh1;#W@;{b37LN7OxOyPA}g zr~#oYW7A|Jz!zz=6jP)z5a|x&E;SJjf-&o`BPzO|POvClo62|NwEUPn5!1?&T4k@o z@}zBImIjh5JwH%2A|9GIw6)8olu?-e?TKpLnm45K(@Xl*K~LL1O@h>X*uxcu$d=_2 zT6dqVfX=>gA5e?sK(^aNayWmbF33t6y{4FJ!U$?4vTj8}b|jpqvy@-j<35_iVWWwc z7pVyuwufBU9f)D-;mrSFrVFvU7$w<3ikt6yUqYQGrVC{Oz25YVPE)0vKrkfHWdLy~ zS1o2mnnfDA!@2OJoy<)S?|#iC_qsJYn0`=Vp802)qMX^iJYeTgn3H`~2oqmid}gy6 zv3YkS)v%+^HGnRT25_^GT9BMK0xn1^{Ltc6DB^#(OUt`-oS)o|`ceULCv?b_3 z(}UVwxcF7&_K#@MAwERf3<0V<@@#Di&ZD4=y#%l9&!*HeKvh#q`Ip^ zCY6U=PL;ELHbJL_mdn+!9{kwYV%SsR;CCKHktC)Y?eUQo!>4P~Ht8CMTHZ*i`mdc5 zw|Tz5_ZHOV5{q#=@9J=3wk#x#80K@by;5vU$5-xXG){#{B%=vKv84z_-+Y->^c4y| zcd-g*-7|Mu!2?^4q&*%g(guNRZ_qc?gKPHrT)mkJ`j8$tZ?!BJshUkt5Rb3j48f`x z`%Ieo1HSEQsQ4+m6$sA$gqfS~TH0x^U0DF?gg(hobP883JDs*#{0`uf6_sy)}pO(E?;}}Eu zN1DG=t#RT%LlrMWPvX3%ozn{FkXx}+YTCi^*!Ei^XC~?gqVGmI@UJT;mz$NXh9gbN zapBA9K8;o=#CT^^z%^cBU2oR!WQGe%C*5RK%?>Es7Oso597USD&oBjP=V}D6LWvwO zV{1`;M$hwa}PJ{-ENA>Ovrbrt*%3NAZn-ikW$ts2e$%h+qDq) z-N&04P!5>(SZ{o@av{PL)&-mTTBzDHwD^jcYE#t%)~Vb{E{6yiiQqw#>DZYOdM7S()xs!U z7g%&KQ=LQuSLLPzB z-97wh5?3ajYr<^?Dt$64$(_}>z;MI0TzkHH@mYa+9fIY!{IJ;TKasSt8Y&+9*vj#3 z)a4NN>TZ8pav^FC1c$nUVM`jzBw~C!F|v2a+J!|ZdF{7BBI-^PwW0bj49w7?jKBnF z2f$`c+&U6kn`&Z$W8~yJc@9ts#~eovMk;Xt1`M?ateUOnL95f*eZK} zM(Q7GvaRb=7iE`rdU|(AM+F&NT_k(G24dPvi~(DnMKAMECszw;oLZ1*yDx4UwJ;TD zsf5d8`by7SluvsPF*@J=1S=$V6Im$`%;aHNG6D6p$7l}1J~kzBvF}q{4rqNdCTQH) z0(t#F09~(B#PGLA+p$|(6bOsd#QZ{V%Fxz6)IO8pJN2%d9x82@Pkoo{{nOK31Mulx z?sGo^`GAZiKr}?WH3>51H(U1O$7`(q=KY{+df`et64!3nZ2)T7;p34(ix>(zQe=k6 zvtPxg`S<4}UJOXfuYT6R0H)VIl3kF;)!dx{lH7-ZrGjiyU>;gzEPRD1#Tm@KfXMD< z2Ci=Fa#G_jT3BPVReUg;r|Ald3aRw8{<#bV`PJxgt%YubXwLZ^p|nKJ*=#IHU$K;m zfiUD88QQ{A3x^U?F;4#XtPpufYF75pBBtg<8^LN7w zqd%P~e2igBy~CvlkxW#kk~gX_^Gv# zl|fZ$j>nOwz9C^z#m^(fW!3k9jBhc;OhY0qrORk^r zYCk{}YIldqRR?yG-7#>%2`}#X^P>x@HRM8{eUBV#<-3ESNUAs)dKfT1ss20o#aMR8 zjb0~pzqG9Kz4-4b7u}n`yqT6XnfqPA_8E)oXpLWH5TD|_E)d!dQLGyh=QA?WESgsRwgekx1%gx3K(d6H-OBykn;8`E|_Q7R}_1GK(48e?*D^5|IL*DVoz2MX7>MMPwxL< zPgbV?hCNqs<^N&N>+f>G1ALw(bxOxJuzvofkVEH1x9rlUG}&tcw5eo$*=D=n@p*=Z zfk!vNdM;0Rw7=b4m_&_*k#>RZcI+Yy>>%G6zdo)^Twf;~=IS{CF99D97V80h%qmX? zZ*|T(k1oDV3o3i@1e~*+3CK5g4;G{#_4e7!tq$8A97~wluOq(T3@_bTuaaZaC z)pm|`I|TZ6`EG~^9-@q8t{~l*tpurRK+U`Y>rnxX67HJTn=rOLIY5j;q!X?>F~COi z+nXVItvC%vJWxbLrK(Z@YCBT|*v z+{li{Lda8|_#1f97lr`HNwFQBlI{8LM;_wwYg~wXlQEQd%s!Ew{v4}r1ghS@RPD$E z=T{&V&YQThF2$l~l{YniiEWA(^33Nwz?i}G_lKmxm{gKEmRtsG`As#WB1kcj#>MC1 zj^l_&8L)yQ)jbK;DowEkQ3{R%_bOL)_PM>@7l1_yk+t}n$Ii4_RLnz}ke%H8W)gb& zgqEC{r*4KR^lK% z>>&jG>kzslKPg3+XRe`m0`H;dzIU{yj!Z4$*4Nxs@6yGIhbj1LRZrmxcIf(DSO{Xv zBb5FrnF^<~Y|t{}azGJ;w; zv?Jz3D4>;;Fi8*qj}wR^qk9#S+b_ZOwoXwyd_Rh-!|=lBLXNvTiss!yX#iyw7ViYR zvI3zCqbk@*C7T2C&0jp~oc9TaRPdLT+JOdh@JRT&?#FyHdKu@s{fOA-qoAH;R}5@! zwikT!X~l~V0i`%-Fqcfobr{BBfd`cqm^m1+rt8rlxIxNuqVmi${qeSEVqJKdN(p0A ztdV}Vm*QaD&~t>Wo|8+OHp~=3fu1LqTfiM4;_@KV_2;9cWpOeNNem< zx?B~M_g3BV$q_#GtL>E_UPd4U`@mq*r`&l%&dL*^duH{14 zG4F3?SU6Y%X2^RUF;+}7xu;Z6>MX{C?<`;1n;ON7w@q@yX}89efSDi|6soN>P;om2 zv+*{Rkpc@A9cw7Vl#J2)q|j+c-;~Tty_41$xHzF+XY+2M5tF2WYS{Fduc7(ATvx%9 zgK;r`Wte&A%EUIlB!H|p33Z^8Y5s(=pAoPTW()k^p|MbA|8&aOUJq;Yz$igAs@gQ> zOkJsS*)*6gI3b!M)?|f%;HokkZ5!~_Ea$;PRQJvxpLX6St4xS zsV4;FzUh*#WwF_j8+hIArBTxjPU*Wjtnp_=!p8_K#?aAK=Hko=S#pVf^49HW%EL>~ zj92Z1>)*cZ0`djb%ex0K=>q;~A6eF4*>#NApRVn5zkwl^TKorB{R&_&?Y!xA)`DfB zs8!_y(S>YXi=_`k+9*1_Q9h(POJh_NB1WoTmSN*(d ze_fpq_a8Zc$hWT1{d3e@tKP*>^G1asL1LVgBT$L;S5)E zQH^6q-=-*m4mh0VtEJVMVtr`DkYnv!b-C6%;?64PNE`1Vz|X^ST%QAQ`*8S-5vpwv z4NXyr71#Ckgi1i$s3S8y)ugS((Y#RvnUp;Suhf~IQeW;s4x9y8TZdLq2R+I^lf2vt zb-RL&MMgmJdx=4A7gW#GAiI8Ov$>2vglIq79MAa`NEg46@tHs}qjay9O2&?9Pk(G5 z#ej}2bs4Tt6t$cVIF*8KM;@keTBURS5FalS>)vLuoCuTn`&w2D7+Wh#GZ4n!#x0e` zfp&YInwRmK%2Oo={#k9`2BAeY0n-GLX~1bO>-w;cN)ansxV|wC6~aHdn9@mYssTM} z*h-ZFz>GNnrH^+Rl*Hd?`mn5Q$5GN<2ql7ak!I*uP$fvoY{2SLF{NQNKw)F-i4)nk zF?Xh(DBxqdDOo!=lShY+5-byL0KKJQ*oL8*I6AP5|%P{R1GVv(Tl)t8r3~_;3M;( zSD%E^?x=Wb>>JmTFv(zNMjoCV7n>s^$2caKyg$)MV;G-CD#5?MS+O2rC?&{Qlxf;4 z;2a*>jhjq;;vpeAq}@Gp&Y`f?*hwx$$je#nd(GH5LM`$Lq4@-U+P7A#eIl40-KMh4 z8e?!?vVD|CiPeATNb2RcJT#Hif6>^`!T$A;s^@bOh2^sQ{&<~B*t&z6_!iAWRTzC* ze_qS4_Tnv83Zx8h<4g+U@v7f0E&{jo#BpTaVXwAyuTI0u?&^y5 z^g)_hyoC&VDW*6)ho&PlwqaHqI&b4EnmCDi$yD9n$4YxyjEr8iJ07aZMrT$Dn@-6* zCTf&Sxo~pVxSRmVW`p68#6Me{Q~(SafC(aVTw-ruxW~RxH&X1%4QhM}VO+T+P_B^w zlGe)c3=N1R!uW%n71acu>^V4&8DQp&3_cM{y8!c$f9_u(FnKMzoev#w@<=kh6q5j= zVyOh?CwJG}jW=%H%jWk%_gz0|obD>fz#hKdV9<;@-i6PsFl1e2maAU0OKqfr1w&NuwsH!yiX6+`8I|D)8!rjOgY+bu@37Re;M!;VwFu-vSD)7zI%>sWc3dLRy z+0yS^972sJ!a-!vB7DQF1MeR2Imu28}r8?NVxR0pt!kGBtJOyn$YN^;B2br zQzZ+6nU~?S&>?JwUd{8NYpo;Cx38+q^5d-!PK?j==f?}&euA7hLDLtc-Ir5%7DN^> z-8)CWJI^PUg>Mt^JA1tI((E5FS7DoMBMG<(?;xysAgL#-;4IW9e$sTqgUGrc4IeY= z_Vlr3aW2?|Mh7I_?0iu{*^6y>NXZ|4q_FQ0oTyFw5WmH*th~n;2=-kD?G7(Uhd%m# z6Q+)l6>+)x5%q6|8H3k!=d|&;Nm#>}91uUmRk|*0x1)%eAE4@mB$fZ5!+%rXzv+;j zh53K!koAA#KmR}f#mf2L(BU=ilnY+RUE*fCpowbtRe3Mb4&rHQrCo-qGs={2bY93v zhPq!Q#&4*=Tmk|lbJim*I#~ZJU;uYryaBa z0mH9el)FQpS5LpT?@I!pP+i!;U}lp(LEW-66MOU};fOZXZ0qyYkD3)k+`et+@>r*_ zAIqPPG`MJJTi}3SAch5pn&ypBByeUo=(ulbzCkv z9XoiB;U329$PnlcMeM#KL}zN}a~eo_Ok2~2d4#a+#wvnSM_<{4y5cu^;V0&n1c=XO zUYs?kDZ|4UypCD75fq}-1{XqCoFJleQJh$cg?IeA4E;M6V{@K(_rBL?Vf|u1Qw5l+ zRyAo$z~r)=Ph6h-4JCTXNr7S6jT#BGN|Gb|z z?dk!+)`c<)6(_*HhufMMiLacrXn(X1j15C`jzHG)2^;;0Fb;1sf*(vY%Kh{dEJ zsNp^)Rj=f*)yX{)I#Y@MAVKW~BHvP6SC|yLP{JBh@n<*J%wbD|_)DoIDuju|v4$?> zG#rQ=3zOkW3gHDyLA0Xv7$#JEBs*cP=dRPqP7RE5&}rvjF!256&JvK14AaOei3Xie zl;;VXBV*F6s7Y?GCB>m`3RXty`VMAx4r(o=Cji0s2p-K^|NQ_9!WVV@0QmO93kz`oF~ zcjhTvWa!k=J0ns!9M0!w}xg9_s8%n+C!mEenA7aB3ES&!fBAJXH%0iMTNNZs%A6`Cdtcz%wW} zFYEnJxNWZ}-k=eIB$5V{ZKzl`@39)Qin2~nQOwC73b|mU*@@S9dR!Yr@#^6KHb&!WN0hXfD-D@0c|m4Mr;#bP&FvSkxunBZ)FHK`i2(9vdZDz&=9~ zTCCQ5j~k7A!aW2vxEVrncjVw7C0vm)3{h7M-7HTO0XPKlpxew+F>)c0b;jQqAQ|8z zRw>ncHf6BLBkSW3BWC&K9>$F@Hb!)+y4mi0-uTt1Xp&{Z*0{`Q57ybpP8<0N;DSeP zO8XY7->^)}Lh#oxUzzcqd=;$mzUmnGs5|mdjLxD+MVtkdL$xqlqU(?}5C>Eib55%w zk*c!ZIfISIh%nB`9I52jYM`y|Q3RJ{chcz)e@s8V#3-LWJfjc_rmEFO01U)d-pC!x z08RH)cGFsRZp%_JqGDb#(MnEk%ERjJjbr^{LnazDa$pY0o#aThKlyBl_#e$Ibut01 z?xbq^z2!)KrHI^{Tx7z9z8;^?m}O>>f7B+7+asO^ z=(7Nnd*f#L@zx$vwrtU-wIL75(?8BVEg&C?CZi7{)J^L?9lIyk?I@wIQBxZ zW~ITt+#^KCr{p#YH%)f^S0tY96Xms9 zO$M+Ew)DfMqLCS42vTNal%_#EGNE6-|)r2?;`TR^Z9Gzo5-UeWqNV-JK1)f$EpXsFqsfNpYL`qzH z+T3`5+TqxyZP-+U=Rd^V=Ezw$%`P|WHaLAt+9fCIf_kclZxa&obdtxJ@knV)>$>f* zI2G&$s|@<)BrvBdMKqX$*R?ruA7JLvx(y4QnQdW}$xjSojL2OBxTl~93pZ&R-3^E4 zfoJ(#gpyX1(-?$XwH$Wyk4gMxQg($cj<99JH%Z|6Uz|Ecj|9Oa;+k7MV>W8iP9ohd z73<1heDt{QIRjw7Q>$w6J|`zE(1*4rpK6fzMNAo8iLMGf4XQ~G>pGXGgQBl>J#)@F>Cu_RTHFW$yDOEg-nTt5oMWSUZ@=KE0rK_I?=9?bLxYywMF({lkbk?Pqeib|IV_X z$Ukfh1S*2{yO9IwX~aOtr_@!8$kC&jVE?gn#?D((sF;>(#k!5F?XhdxH~>f1vVJh- zyl&baGcf((xW?{0WA`FhU7kZlpmeu~m+$`;BDJ(Lq2DdXLn*c9=vJyAL@I?>DifOY z;Wp5mymN?o%T$8&U`t`ugSnWnxEAdYLzcE9psQU$jVLUvuzTuQZbA-U8PW_O-hUIl zN`j^2!}c%`)hZg7xD@=3+Vr|^^?VFk%;19WM$?KtC9Zt;Q*g+$M%(8Xm%bb~TdtrT7UF`VNz8|CX<;a<}x z+3cLmlK?BnzP~WD-eP#{<%37`WY+y-o=2k^&U=tB`4UkmEeHN3wc~LE<^%sWS?{FS zx0lVF5E=r@NsI3Mht8m7>`=>v_62^(1((e$ai+gy^Ddt^$fB$ZB*!^t&L=NkDx4vH zr-$Em%fnbBDI7xdX|FWYm2HaNTuVyudZ(|8TOP8p`$MPc4#c$S%_*6i4mR>-LzUsH#F;aJscVV5E9Q{ub1>Djs{QG$s zoB@ZlOJThQ-&BGw2?SITj^U3mpY3ku;aBx&MOpDBrcWR9JzDwwmeMyfG14az=SvEj5_0 z5Mld`L77GKhkS@IH~l+%(Nr4WX-Qe-baTtB-2@meV8d|8WCnM0xC z;zjtnc%?MX({Kq3Du*JgTTcch+!J+j~n(3U`2Ic$`*Y(=3 zJO3uEcU(anl-`006POHlA@MqkfeP31O%#*C91us9njNoIR3~g)*m9MM4I<)j`o33w zS6|dr=ll1VqwcbN{%arP_t5ie<9ezluAp}}V7>9WfK#ISH*G(=!h^KK9H)+p)k4?4 z-2>Kp-&XJTU#3-#uZJtYk0<8X@oSC7-&n@&*VSPU>xk=4je*R|p1ctpIwxIS8O0YAB)f*(vChmr z8bmxP;`V!DI{9XfZ+z`Ewpiw3lnoABobe#uuzg(5hNo|m?M_HwM69f%!9QPK9djHL zI0f-$<((i!7eZv|yLog8X*efc=@*+%^bg%c8)p*(t`$C?e_3-T+4-~|Ry^6A2 zdvMXpoJ6u3ozH@%e&KW+*lk6#c^sHu=Dg-FzClZJ4I`{tjf zZN6j8PcqkDI$-KgG(JO_4f&>WloR0BGfPH41A_||N()q;ml{~TgC>f)`BuJy#YIRI znAA@I8kPaBAI6oIh{nz#r%8iFS#eO&NjfYkS>o@F$z%82R&9tEvk`=*%^*Su;XcGv z@XgN#u`md~kisg3!2l7X#;Z(le}d>HQj9RN1V!#Kyc&eyIeA1fwsg)xP;e~OA~iNB z(~s1~{R>v2^=YP+2}L+)6gx)rE)dLa-%oMHjs#f)n>$}37)M!>)sVwW z0ywLAzXX+=FN5TNt`}MnXU#Y6JOok!K0&Nh1F;t1Rh>wk#w^M-pef3X&DCXbnFmcc z1OimNCc6`jy$m*yOCs@rfQpobgEOCdB%FGF<4{H4HQM%kD6aRrPP_0q`6)92K5TB} z@vU*Nv1ZmE_Wgc~9aSk#xZolPT}jv#$Q*8h+Y#u|GcKPmz8RJtWA_1Nc8VHml`f{3 z^?NU5vbYtM4o`7qhc0Mj^$kO;Y4=151=dageRg`8(m(-Q-&kx`o!8;2Y$hhAipwenTRB{#|9Pq=UB1T{#9(4 zAhNvM@XG42834a%?M_}3QE~y30a-k7HXn~Wp78^jU8;rPWGNG$?ldx?b(ISw&}+r% zIfJo(n`l@Rp^ee%%95M8ml>e`+ET~okr3~2#(|IVS@LgsTIu_3d&GVr8Sz^;23T!w z#Dfo>Msg*Vw_Rh|YP<=b5qeI_dhCjnCOUl2>IBskRB7YlRl~lf=U(Qt7%Qd;3ewQ% zDY)*)sCQKVP9l!v+8+b*(TBNszjEw-@Gi*rwD!^Hi@N~6Fj!738e02lt%!Hzk5Psd zW;GyV5EX|qh#QQ%6Eq32{F#PhO3Hm-`a!#2iOcQwk)LNj#?3+j@zb|y7umva=2N43 z!v)pDpI=M5Aa(i*1CrKQW+6y+{Av~jKW)ObXgq_7py-Q+0VbGua^$=jQ7=*}1Bo9r zLM3hxerE4UlLu^_QD>tm_`qoqX~>-$wI1=#zvlR#3!r_d8#Ix71kL;ew%LrR$~2w= znMh=jY|v-9a<4xkB*+O}UF|K6Fy}bDIX$G=7I0twuAiF)%{^ifF?h>C2pi&P-0a-U z#Z9DCpW%39Lqo!y6l~n_@v?)Nm!-V0ScqqksiC(3*qkMyJ6NNYWt{fZnVH`oNo>Q_(<#_=|5jg(K96%NxHh`rLIge2Z?ACWSgW-}2 zMQOiFeuoplG0UL*l|Y=hZwxFG*bZ$#NX4`krOBOsFVqyl((S@PTn~!L7V9>5kgBjS zTsB1F{55f`j2-WF(9_CpAKn`DXIh9zEX%oXh?vR2(d3D#S$)(>e26oXye=YJ4I4ML zjF*h88Lx38)5vqUgob`q`|u&ONyf}l|5Ux@sO&N+By)w2t$>m**Z?>GZvFRg6^VqL zRV<@8n+^7EnRf|W{Azv&l@;H7#9W8;)7vbjiNml7NbmAp@ z55M0+Hb`3)hO;Iny#qyHc7W&oD=WHVU9fhKZmB zCDS)RhkrcW;mpGbh0!v3VcnK1Pd4_ZxV4YPMamhTI#_t&mixL#QfMc3r=Rt3uP=ws zkc3>PCigy$&hnlMMGudJh&rf#sDGafAXmOsYl%pOF&s0h)OS2XV^uOmJ4GMmj5dzlSA~>IEuh!4qIm}joDO|aH>TFZ9CojaiW~wF&s+Kh(2m@f z+XhJxTo^Spj?^cK)~$GMk!XxvmW=0Zj(S1+{9VjY@l!UZila(qgfAqP3CK4ayPB4# zjS-p|7!P4B9!vC~j4A%8TQ655_*P6sim6Qw37ENwb@`*bu9lU!D$0ZCq2SJU#Hehu z(-eS!{|(hk(LaI6ax(+!=i2VnA7Q`WhbTmn#UO=Po4M4^n+I|44E0yQyZdJ!2rs*jW0%)bH^?PlyP}Jr$TywoVoSUAHeTb`9-9W4~zK`zP&rGPTYrbm%M9e(j?8RpmymYs)egm3!Ot@2!Viq%ctMYWzdbSjd?o&$=j3q zbqRRm14NlGLpTmboD{VgMs+meU(*;Q)U`(=?V`p=HnBL3lvZkpe{*$eJgu}Sj2Su6 z7o{*yP#-I|u2->EL#TsfhIN?he79QAs87q#Z$cvsQdZgMV+J)(2z!^%rOdT$A%i(| zMOV71f&FFj$>fKJT{LXh>qA3@V%iN(@a7fqWp_;Qs;bxJOac*_pPY@Q)>)eU*oieV z#KU?Rj2$|ZUvhBfR^LsLaRVg{^VyCuP?0)CJN;tUo+Z~u9h&cANWL7%bHY@4q1J@? z0HLV8^vzI5c3cfHa_L$l(Qg)GmD~6QcrCR>EnqZk=c5I9ErrqMbyu%#6-kA_91<9i zGA#t{4zn$Zwj|Ti{7^Qv>|MhnSUtzA!8#>u!7W-XOQ|O9u)WHoW8umB?R>cCLlBZt z+?z-9i0hYGA9BBg!^4?WR}L$29=MbCew0QTpwL5G7QKo9ty7e@X2-I<&~A%K3;*5I zUn7x;{N6vKm5=}%T=g|X zh>F=?U2|8&QwQ<&IjeyJ#||y?a+XcxVU^b_a_&o-jjdf5_rl%JWDw=39IBcrpogXvjpRTg*$&5%R@#0us3F~W`LlQn%!>R2h z=g_RUT1EJrI7~!m8)pf_OF@gPPOWOE2+WKC;7|+w{=87f zldrE8VD*PNUhM58G_Mx#-f1LHvEViU-)35Bo0Om@50~;T`r%WqDQ9b+4C4RC*jt80 z@rG~10)li&EG;Rp)Cx!mD4l|I$F6jPfPj>wv~(j~yL3tmEFIE~v~+hp{Rzt&cgWwRj353YUnt%pp9mT{{5^_`7oHt!JWDETz`kP|Pf@8naKiMY zt5VZJ2}doO;Awwj`-4tXsrH~`w_&)wOOV5xHmTI$v_tyQf)drnmJ8jax=f6&aa(Wd zdr$c5cNMYoajIvq^q_CYFDa(2=yR@1sM9*Cd?sg}PF#P2>iAF#k9~f`n_AuC>uGm% z^C@PK6upxeBS?EhYcH(M{{MIr|9KAoya_%CztI2bM)Ln#nEG$y|J#k^hy4HPM((vJ zma`~2Gu3e)$xc+2wTlf7_P)f>ll%2H<^^6}iqR14-AU|F2Tw8mEO8;${pX-GS@U2I znTbkfJ9zdzmf!XrXLn!0Zt+VJVtraKdV`1Z@D|Cm%Xp&xfvg> zYVVe_vWy>lmd$@Pc+>Pm!*r45ZTAD*4+F=$sbZTju~vJlhvdTjU563w$Cde;0f*q| znuz(T$HL`@a^nJzTRHWD$BXH^p2xli!TkXr>g9)>r1TKil%BJF6};g`%IW=iU++K5 zjK$tUQ`hsxGi_T|I+qTAbDy0gdATYzSqq}8Sm?(%?Kp5LmAHd9`Ha5*1ngM5CN+1!|K zD-P3p`Cz=a-1C!0|K`FXMa*Xq=-S`5Dby;TW~x`R%UMzF8td6T>@NFUi?FPR{*D&R!B47JiP@(G( zVpS;P!Y&KZ4CDI9bwGFBR^V}e(G+&%yZm%w3BuXg>cvO`EcmC^Y(xx*DIN1#^=~sh zSY?z7U*8n^5)4CVcSB{;f}_28!8q3nlEWSkD>WMz$37EOUQRa|?o23{*Slh;+{06h3o_cOcAX@)Ec-?{Sg;OkS+}GO~BE z&3KdBc6Mj`Yw>v|HesGwy?D?GI(@U`v6N``^R$`%VvCkvY4G>vD?`CN57`^Iddk;P6fULW4Uo~yL#NZr zFBFs_S*uqmXxdP&1=e)<)_@j2&xB_A_ z!UQj|kg@tP%Q%smF3FA|yu?d|RNcBiOH@X7btqS=|GIUtYdL9f01Lr8lV_%OPB?w-WIO%6`r{D(d|E zd17y79oi1FFRxcaw&0a*5YnSZ|pwPT90Y+U)wO z=Dp(XTB!AAvzv3J|wMrjw;kw^1sqY{%jXMNw-n{v98oR{&N$&nvt#0#9- z6k3LEr{L$?jCks~Gmo5MM@R2$;LMtI&3Ko__ozuch5OYj48jupDsLYjqT7;*M_RrZ zwBBAMWH2H-?s+ejwY#b3Crkg;RvL zeDA_bEBFgVhD=Fx*>b0Q&AOlNxLfn{BKJf~RF_V#FGuE{+nNf0s-<=nJr>t8J5Zhp z_NZzoJ5IZcd&4DsGBZ?4-^fv2i~G3$#@^`Jk=1cS=011T(et~G4$DfXGc<(vORNL3v>VU720Xb= z+bouJN49S6FAfjJVmj|Gi|V}ToT;yYS6npm`A$&(F-oYul);?L!EV$Y>3pE+2>V2E z!fBelHtxR|7ddWYld5JDC47V6k@x0*Jeg*)hqbuutK@Mxs@30^d{tTgZ`}910QDn+|XBaj`rEDXv{3YXREg2{c`Px(!!cYbrQb>Cmqjui!V#=e2uOt3-0UkWvO;^0dG2x znReW11`2v_&fT}XXs&RS4Ng5foI8_jtfcEKvO8`}ik@F#=uzTBmOfQ%dC%D|o z5dZdZuzwA9yBl=YI@5bEf;bYXkhsaHnPTjjXfY3(xDVH-m5*?3)ibuu>u$UCU$8Mw zC*Ku!6{*uJ`cC$E@J!E3A8tGW*P@udJKQKboxE8Ej?qNddvCfnng8DbuE`s-G_I|^ zEC1Wyxotji7Gb|{!Yr;@dRU`p77ypa^EgXQOeIyDwyFKDb+s71?lXuM9x*g~fka1$ znrC?|^b&lIbJs(((u^JE)tc6COn(Hc>u%`l36Vq)oS-%O7LxjGnXR9RSnbzgR!th) z=I47f%IM+gPg`Mtc%o)C|@b=O;Q7gsBdv-7NbljmZ;pFcjENyooM9oX3x~0z5m7jezL*r_eU}D>GMmM0`O3#*N zT1Pi{6J>Rp&sH}!agNgYmd-q>uH6#Op`kU8mn9O5xN{$0T^0%-2Zrs?r-f4@JjA#G zM{OjH?w+FF)mo^VRm;Ed07kFvNtnMZZ=+`chP|iv>Y{=0CE3+XE!&=L!ky+n95}uG z;!UD%pU}Y|_Y#;P3nyTPY{P8pXj7ey2FL{_N)VpLR(L%o%nI|Bi8WC@@bScKqSd~QPO|9%s^PR(DtH;TD#uUMK!@B7{B8WFj$wMr! zgbZmox5O6Pqd(0iYusf}(K9c4CBV%9KIjMONv(R&ceTb5U>wirrk9>;D2 zPe}t&h0Qs?{SHsj?n~u&t=wnDjoPiC8^s=D>wBfs7QKma_)&(|l_7**;6UQK@B)?0{+SEY2(TQlfO z3g-EZ!Gg%Ua;IsWpj=~Re;0;%n72A2OuCH8r7p}-rfr)0UE|`HpsS92i;SZcFrkId zO8ADc+-^R$W+!$nw2g(eje|ASkaH#m)s%K{jZHjjht7wsXA^t>!hf-CgR&-o@SL2= z>YLrY0{G$JnMR$}A`I?Ar-0ac$k4jWFe-(L$KB~OvOT1T1$#wv=pV~c#qo`;F1xO( z@FMDw_W8`kyz=GYu>ATtOKBC(%g7o^MskE!t=t_YL(S6!VtpyQg22V z^I#c0Q@k8rg~=(d8X^Z+OE%F0tI#+=hZSm@XLXL3A0K6 zh5b%=Q+v_oU)K0;nWYCP^Nm*peN6opU3^O(U9mZN0|g`2YR7UnjP6z|g3ggzIJIMQ zMqoERn-MDbaZC$$sUcd_1}LJ z)RWC0A(U!mr`vBAf2(SJX!Tqrh!4D)LGq|nK3)@PnkkIfwJ{P$sOvZ=4Ls;E+UE1O zY&OGg_G_xRv8u_dY4!-J*-xv2Qc8r}_XXo- zGgr4p`3T<_cAE=X;j*F)CS$>%XXLsw4shF=_)BWTwe;3(&PajuH~Su2<0d?2gBx?f z7*T4^tX)p@?_RiQVOL`zYBpW*T*^zKzJBv5bR?rj?>i$954+tzWt8H%m7U~_BY^B1vu5Zj zuk-x9npqII>5Y4O%xRi~r$i~gUw)1ZHn5I#VoG=+9q6vMVv!N%z*Ng^+Fh11Q5o1B zZtnOG#xBwPt@S%8HPkt+{sZMpv;=lGm>mz7Zscn%_EQYaa{T|3XH9fR4!e|Ed#P4d zcK7eWcR-e1R0Z+>(%AvHuensQn$5sdu1n#V^C9h6ww9#V=)v#m-hIH>X7HYQ;Dpqt|I;rZh2gcpj|vKWUaQ z#ntuyq#5_WG%NYP(hT{()6BQaT3AZmqeV zzY}9}c9HdPiCv+zwL?>|b@;ShV?i*hP22C@`)JeS!3ZU>+wQ(7uKafLq6OU@?natd^TIT=!VUb{1QMJn=)<87{PZyuVCMmwAoU8Ph>qBo_5P{dI#zn?HBfY1 z<>Un;Ycd$JU*RAbEgaI7V4DW()s7m1m3@Py4r-_(NsaPeb8?I+HJ2Wzo%UAx_IdH&c|NW5 zjhE;~WY@};Z-tTa=zJ>PO6>;CL1g3KGGH=Oo1XKtnIQW;eGOq)FN<B|3R1EaM|qw<1kwX5PWei5w%sk`HT(69~c18I9! z2DjLcJg=7B@nAbZEYcdxJC8Ik-@=R(wp{36AU|;SThi*Byw7r5DoAMKU$BsnRmar3 zi-(5)btj#k_}hW!eUlO&HxXTgSJP13P11e*x_MmR%a>h?->h((R{6Q*u^J{=dE}N> z`-^cDI8}4pC3fojr}nLtDk5b{SpG;bWhz_tBk<0wGn06eVbU$=uJ(qdJP@aDIHNH3Wp3D zF@M&FLNM~Ttr1XZTx+KKM@rysNATWpRe}O2<>@2`wWnkgZ-?w7xkPC9CX=av^IGoH zc>3u?bR0aNdhx9rq~|4_SFS@OX=AZm_mm8#$_(BK)`>7waKgFPtNc;B;aAz9YGj~9 zns;XVi$}THQ;k0y!gmokY*fh*T-MV588}2Lq^W_w0u&IaYiEO3iRyN967AvB@9jnt zVp_LX`SV@0qbE8WhBD~qh4o6Z+|%r0h=+b0gVxVI61k_Ozu)x4g8~L;d6mSKITh5c4}>yEbGGSz6jDU~cfFqW06=EXebTSR5bYy0nfEqU(J z5Y{keWx3{nJ+CAQm@Qf5MFTU`PMNNij zSJ*jqh@b%PN2)t}U9IxrCjX`LbIP zYY0ZJ2tpm#(#jnsIgko4I^f^I$R#leHQWT51;%fX-p5XIQsBBWoOW)BgCWnHI8>+B zG?1zULU|a(@BU7e4hU0TKM&RejT$jMP)I7mLc1mNDJ3|YrC?pcYe$I@KOE<9DygkK zS3(vYNBFDbc7(VNIu?zTrxt6kF3*YZiu3v~Azcb9vY&=yx5hkOJ2$DERpS&=BVr`r zAV0oCxJQ|Ln?0+MJYJZBygHFy#3X9Wdb<8_Y4#5P|HPa;y992f!j+Za7P3fM{Va8k zwDw0q91~G?(2V;NYuv$_ci~Kbr5j7--zmSeV$UHm^BvxL)KGW5Hb6;0d%n5Nv*^1Y z2S@oFjrD!)+6(H54_OZw*%O8vRi{?K=eBJFCs?nqOrIMjCqeXbwT)d@G<<5Rj}_fAUZ#mMY%N z$NfDQ(;zY5Xz?VH)TYS}ves%J)pQq$>*he3bZ)~UIgqODL};Kn=>d=ye%+-!CUVZJ z%CrF1GW|N~SsG30z9q_6HoBM#DJX7%t-}=tz@n62GQt1=BzndnKPG01toT`>J7WTb zl;PH_lD2JN^Xu`A(B>PKF0W2QztK^wpRJpO8qND;WAvD%VQJciwQXrXsw%p-pJJke zC$*d~*$4ERQy#osm=j^q6)Bl7KCPh-$I3+{*f;03L*aW5RBOX#$o{$S=oC;$Fe(?_ zcPUlf+S@zxVPV#~Tqg7cqaUV8^Ddq1F~VYDx7Ov4vCNkdx861ulB-_|P`e0AFhJdp z*sb#04Gy|2pF(ec4Z|OkxR94`yWn_6J)z^pI$}qtII`nWjMKV%&Y%}36fGKYq+q_>lcri=vpv1wfiG{aH~M@eh9>p ztNmFulZbh5kAoc^cevpQt|#xV{J!=okg}pprPcoEt_78YQT7C$?!&GU|G~U57NP7F9j(A7l)KZ5BKe#)xt)a!R#=|XUatxFqp)%7 z{tyRL6!yvGOvJ!uYTOT@08Euyoe(U2zzo6)*znV>vt^GS5tqS@6mHBZ(v6_7IZIOt z?bZ=x)81u8hA|lC&Ozb1y+gE4J_k`X>Qk&DA-SR15g}gRfih=sI*7LtBr?FDJ<%TV zF3I!YTer@KLYV4v6q{(_0uI)#nJ&gPK*$nGVSMbm;zYP*?_=80Advh8MJ0P{54w4x@EbDyRxol``)?xJ0 zPEB|t6RJyD@|x09R;jY%XbKe?s0y8HuVqNGFr73j6s+HC2*(l<{xSon*2pMk;qfy; z%GflOyX6^VpQ9bhU2}UI-Q6FIK^TEkMt5$<} zrz)<}p+nF?X#8Rsk?d_m#{T6GvZmbAl)IV`gq1lQLls4=+x7AK+F<}LnyelEjq>gH zX97*PhomMHeDjDxbDJmVai__&J>U6)dxWj>GTmiW@oxzV@08lG%oGPQ zA_31DcSpd4XN=RRk`zMXbTMxOutH2eBkjryGML>5v2xgG+N|6gORWP|6V}yWH-K|_ zOtilMAjmV0vh!Uhg8nq^c0U!s!nAD3O^Tfe5$6mEG|^T*8``ewxit*j2g|@ifVo-e z?UPvmbPH|3O9B8j@E{1_z_Uv~@2ETw#cl+`IW$-*&nvEb%sa9$4kKK~%{Fj)F}`xw^ij@^qlkfyxMv{n51 z-L8*=qPITcb+a+TS?nlmF<~EMTsl$%h&>%T`FD#3-CC!wlj|!xEpnMln2FS~)R3S-84AF7dcj<~~~`fjJ&G(TSGoMvw|L z$dJJtNms`FDc{=BB@Z!S!-^cTUNQh1i6bv-_KenV9%8As4H<~Q2O{a#rbU7sB+*f?v~CREmS_-6sT$q~n)WR1}Jt!~-v=V)W<$?{m zsaDCGhWjNLwNL{9v*+PSBY_7}Xpq$V{XrA?hkksQ+M<1!dnk95r5|6oCuJ`Y+G_R^ z8XDKL+pFDfGO}?{2w)n=aB_0whn zDBxBwwU-&@)Skm2@i#y`uHWwOtZe`xD58}FT%3a7?N?cVtMA6%j6S&dE{bw5s}ho^ zG(i$qmANr0xeu-_E;MKkNj;s)E&i*WD@#UI<#VD&SK(vL}-nUS9zk83<5zXg^fbCyns^>?) zHn@HMLb5#~q0{(t3_UcZcD-ww2SUgLWA+V>2cTQ5v^5Y+jONxF7`Fw!CDK>?Jm+b+ zxMUrUnOv#XNS1F7F~9EjMn}yD^fy1m=jrp0y@F)aP=$yLv;7ynS$?kTtEsoEN)L8J zufKT}fzt?*E2~C8o&4M=)nCkP(j-1&C0Cj+???f~rG7NMF^VKb;WVBm%C6k1>zrfu z$T)2Jx3=1j4-HSv`O(jMCO}vlc@yNkGC{J7Z~%$lpKP+iGyoDYq=-8E0m@v*Cpe=&SdQk0b*Gqw;V%4G^Qa8u&G=ebz|f&ao`2o7P-9 z!Ptu&a8HWSo+;hJ@j!?8`4_UwWuq)}oI^d)T3=ZvD^v;Ga<#uii&g7&HBpN^x50$* zQn9S`HAf|Zv_lZZveL;)DmCpm|5UlLr-39!-G6c$wt{!9Lp`MbS~I9VmrMOJ`n8nI zk8-#XVnEd0%<_q!s7O9f<=?*g*;}F@^$_;hh-s!Wh9~^r@~}c>9PfqPAK{@)pZl)c z>lo4odp&r7H_KT-g8L!@`%(Z^2s4|1UHKM3c{74n(qynL@&$V9?=|KCHM#vb#{7r= zca;oWYherXbx7AXmWDxf^7YI=B^BcY+57&a71inCDX0La%)(FpNotn6r@BMfKpe=; zF@#r+mn&9o^K#{?1nB1J4FnDQ&|AGzuz3#XC)}uiXSAT&YKIPxsOACjj6 z-&cZbM!o#+v};qer>+qVqaJ-fogDQS@s)N-0f(kyJbE+x4S#qg^s+7mV3zD!?ShFX z3eQ#)alfk8Vf{>!18Sb5wNLSMO&R@jhsg*Q)jB+)su%gTf!PU80r&vkQQhvF05*>0 z8hT|SaDc!ET;WO(xZGA3j?f_jU$PI$_TDw;8fqE*bh)7y8#F~@MTgV#RWuEyN)o0{ zhpRb(B%a^yxen16@fl%n6Kc>_Rvn!oF_Nj~qY^tJe`fntJd%%h7_$%;~5^#)ijU98{hp=b1FHe3vp9X-eUG^p5tQabQD_G|HQpT&Qlj6WT zliX*Vz%@&%j}(~$M=mg`B-E=$4Mt`<)xQQVn)hcaz;n_sQ-1?=)>u@mXgKGD^cHaw z{K9#ub`9J7=W>}nh1#$HVS(5$^a8Ft?A2l?Ia;vW0udPaOBLC%+h`8Kz^XB2jvn}X zI#st&n;5e21-;^h8HWtWV8KK&N{Rs!-~I*@3leBAy2{51xNC|dTwY*+rmt0dfP7(y zPb8^&a4GQF#>T=gT703{QSjXF>wnCpF1p<9-7sI?*Xs3pWPh1RF*SxT;2&{4!mqgy z#`Sv-oS6KTd01GS!wL|&(~;OifVhI!C)a;4B}g#?#Knoq_RzS}w3K5+(i{ewh>tB` z$ipm^{!!rWL!i$mUyfy@jA#9yf{(Y8`hgr(qOOz6F;Z{Nq0~r-{lP@};We(jh-9^2 z?K3W1S3d7{VNU@Q8ct9}loqy*tn9i82HkNHo=9w+9pKeSk9hYm5lBM;gz&b-RKum$ zBXDr-1VQ8JM(H1sAcnoK-mHMxoy%Z5-S=MI(arf)5LSs_Mr*Y>E+CZMw(NQvtwLb&D2%Qm4#Fm!2@oRi@_+C*W8o34}wGKJEWS0p`Zj zrt?}+j|K+sa+x=l1!On!8mI|u%X?<;Kb$Yn*Wuxr{t_~=G#B6if&gh`8Uu`juWpe{ z<6kV$I$p#8oa=*cgV#tfGi&6m6uvBTHkkBs_dS7Hznl(p2S9n0P063Tu|dMM;z+=) zR+VX6i`0qGrLbc(07{zSi>i`Ep`fI#@_ZtZ-iG56c1bg8v++IB5PH6|(7zfkm|vMy zM)&wbUXa+Gx|f%_ihfnCKoKq8B>ePRJ6v68{}O(cXHbn({!|KgUj*qI7n<#@&9Oxs zUOCqKx$q|tU%JAco4XtONY$>Gb4tNBKJrfeE&gZAAfu5(-r#or+u2RezVylgtB~iR zC}6al9f~V2DU!_KEeoJm+Xj5gk3d&At7i*%N$1Ex4g)`P(NAF|pWM0k+^renm7iId zLp#6SWjiZs<6uaA9xIh&GAdC5a$qv%5M)P)3vfPg-tfuieZ%0)vHmUNgiP(RKHIJ@ z1G=>Go^m8OdM`8?A~z$b4t5Z;(we1gV#@edYVSe|^ct*$*Ds5t#`jYTNYS%cf}eK( zK%bE5wTf)2`Ti|$j$a_84Y zUJ~h-;S*+!?c#wVavwiIy$O^aC|^bGY62xxztdTUr*;7|oun5g_+@jD?DW0qn2<@L zyO#q$T;nMq$Nt!Ts0IZPhKGxj`e|Jl7D$=m=2QJ({nib9$Tf{1-J!(%>DHpk_|$1I ziX_;E$^^{)MT0F%km%=fGqs9ekz#2LQ*sFr%HfPyL*I#z$K+Cr_%w%*to@s<^! z@0~SuOz}ihSB#H*>vm1{%SMdbWH6YpuAQWI5`fN>3@{himJyoc9>?I|EDA%BmXu$2 z?(L)fj0b;TlEL!1qZzAFwQja)0Y9qd4{C}&H}FhGOSK1hf(iMg)Z+&X;4;y ziw}VY2@!X{fi$YZi5wB3o}*o0CXz>qI?wWz5piP{*4q!XR%0tPI==RJU+S7nzQX1> zd^gy}K9(Ix^fO_YBC9w*_)Cp$TXb&*$)kvP4|Ed&#>zl}59+rU=|&xsKl$09f-TNE zX)aS+i?0bb@oVNkZF@MV{ESh<38b(|w_S|~#4Ig-lg~FDKvcg;p@9Gt*J(-{0c>V5 z-%a8NLj_0T#h^qkB*q8^=YVA#|I81v3a&(MH(b@OfOX7qdTWkUfdeQ;-ba}Q@FCc} zi7f_`z31m{a__zPyd{|g;5qX1QU>TmXiiFB3u|%1gv%BV4}@{lO2R9kg15v#LG{QqUycO) zZeJd327rDZXC+%vF*@HBnF76>@~72sUG*|j91K~lDOn*(^hj^|{8mifMD!!(3XmlE zFEigkD6S!^#{eJ{GkjRxaIDaA6XSsLTvfdw{~HAFGuE4D=NohACIfk=k@4&=_Rh@c zUCIG))~uL=k^&q|YP)mPT@pMJ2!@!@56Yja-IEVzIwEv=2d|180u zkcwq_QS;{}N*n=6d2RGzy}7_ulfLL%-pJD~k)Oy9mM(l`KQ2^(b_WezWmGSoj_eZ* zI6&Y#MIl36joBW_oY(w?=ZSuR0XS0;5Dk!sTjc3Mj{`d}t^C(svt7`=no{rke$%;< zcA(k&id>@_Bw~#7I-f;mA@F#R->&hJ%QRKkB+;uWS&(`jXXe_#Qu$x`EqnHhTBn`k zlIklHcQH?JD#-CU4&eXI<7m#u-~IR_npGF+BUqg2ij%zIlU4Du)*OA0W7;T$qS=pP zfYvIE13B!h?V3!EKeL_WaA>;3XJX+68J_!1){-gI=^7G|oze{qdDa{Fen|*MQ;*OG z1|p3)VfZOe;0aB`Z0Ns2{fwCh3UR9=r<&e-y05lx@qU8C9l!c7P&Jahbh>5(IxH|G z{t|#HI;T(50N7()+mq!4rd12s@Py}d&)0^{b;!s0$C_=aq5tssFWd~D+G|?#-)OSUSqq(_kQ%D*Q>M$ z65imW_4?~mTC`sg3Ct3}tr?y12^THzC3d^{xnr}80kiPxZlix87~>WIMo)SF2MfPp zD}MIfc{-WH>Uc|MD;Zi2BVNMZ|UYkJwGX@85@ zw3hy^q*BmU_fM5jn+PD&uf59k0OX;kjfe797^Uh5xl~U=1~{2_YT31@*6&ZvMNa&9 ztlNyr(w0PZL_U;j5GOdp_GjfSp8^#A$h_?kKpr-lRZK|D*|w>3t*zgByqCn7IOTJg zn#Kv|f#D_ds<`!yYF$u1<*1&zW8U3(-PxK1^vU4PDm#K}uZ%5#WdtYq2t)E$NsU>4 z{`?G8ctQ*j+Iqmuo?Cp|x1U`ZZZTemAPAXR*S?CrcMJO9p}_=Y&87QP&8W4j*+q$J z;df9W;+(j~Kosl=a(Xvnd3j{S{1KPAt3_-r6ax$hulNxghl(C8?2s3Z#5dg)Faq_y zIO6~q`P+myG+gU@q<@;GC?tmkT(0Y`i_&?qRjt~F#x4eu4=+k9#$HXS>`7!^_mjd* zu0`u%2lLz3W*5Fr-2&5E0B677&c7 zFr2Qv*GFbvJ0xj-@yKklB8xuoHy+Pnc0h-t^0vj2(*x*Jw6WMv#>xhIev)_!H z%x!-Drwq4h_8K<$y{UOBDrl2At$fCSv$%21usc?51c>Oz{Bj$emajI>N%!hhmqIFR z5=1ng7-u$%tM&=!O;;ElzL3V{An0{t|MW8!imU6M&11IlDUBCs_f#k(uw)&$k-MpFg*H; zA))Y0#r>JzJNmCwW8`83V^HB_UzkSVjR=`6pKt?Z)G!A5SVFzelR43wsro*aS*u^yb>d78cStkW8LeOr`GSKWtR$Z!vIF8Yq6lT>`w>Sa3 z^qjUKFhm!?Y*uW2iBQ0AnT=y!2?ucI?I_Q`R$SGVyjrg|erU~N&t})^@zY?Q53fzB z@BsZLO8uH2=<&U8gIX^q1NI1kPySK2UpB4zEja-2_ff_CBuc0K-+7&`pu(-=ZD*>6C8J4$f(w0n!6sF?BCY`7$4<+ZkYSe5#sNp2Fj15>sFoqq)EJt4?0!$ z!oRpWfln&z0ROLw!ng%0(`*(;I#4BQ@B{D7@C;B&pWViwiZ;5kMPHm(Tvw#t7hP;;@JqD4fh!Y8mdqh0qqTZKcQHFV5S>WJ6jX z%bo&#Z0oa~`i04^A4Tzeq}kFsY!8-8jT?=Wu|~wrsg!^=E@G$w0opj&-5yt;??X8o z(A{SLr;VWzc3oM>b|tF1|5+wM84u`aMm@-DT)BVR*cHhMEV8-rb0u+T7q;dxp+S&T z)J4fPWuVv4)KYM<>#k%R4GfQmb;J8MSP zyphZ%0G(B~nKl*rp_y~c>sa;>xdhk0Iws+%Al9?3&dt|ueLZ1ThNx7cJG{_cDelLWNL6pK<$fs5XiX=El5}v&+bSg@G{)S7`bE>Q87(a{iY<|Dese zz0L7(b$Vp7V~S&$|Ju?JH1S(zwThXVU45Tm`WKokVGG=ReWJvzjk${FFXHX1``T=& z0+dM;$B5i&Gcy9`3ts+%ULmJ+-?@X05TNEDV;c;Q95>v0>K|ZrZY$m?s@%BM01LOf zFhAlm^r&An_$+$acZ257`L)4%kw z#mzu^P$z(Hr^YD0>=3|hlY8h~VgGR4^1Us==G8B>2Vo1QmDY?{&R^}R6`y!<1vXG( z92Vrzc7+bi>EY}1lO|PBpSGHQ*)|@_m5CE@$oK)ty$_BuOT9NpB2H2>f>HXgPLAeUc&I_?{^l`cBy%IjpO7o(mnN8931PJi@jSCvXHQ@Y?A zCzV4DX1H1Vc=7e7mYSO9qt7w#mC*V9@Egzm?trnmf$&0MSu3D zQ-jy3+g1L;T19+N(EMpE)_=p6wz5OJu`DpIaEgR2ZBZMG0?p>^ckh|4uH|3!sPq=g{@?lJPBXU`Z(ey7 zX7;S&FYHcyT(I*Zuv?CkN;Qa)C?R&{3*oroKuEUXwKCt3yvvirV9WC%bChy`HGLZ;1qoGT%l3!1XT-cLg4}|rtrd93Y${BVBxRaCZf_^ zIx5__4FpUGD>f8pwT;bQb`0BMH6aREW=|y+{`&MYl~Yi5po_=F7S}RbeAJ$U)ncJM zgVv5o`%uaN3@5@cIMaWl;gT5=NO`1gc6w(UFo!=#0P`BVi7?osA!_O`+Q|~opeD^! z?~7WWTczNMA6fd=XY^2@@#N&o3(7g^*=dh>KmUa54c&$Lr@#%>cYJMGhZk3mPKYtJ zewo5G65IfF?i+p?<^H5YUH`)Ih{haKa`x`q-lViL8?9+lz^{3u?b)0G9V>D#G#2*> zn=DpIWJ;x0Vxm)%fFA}f+_{Wbq-@vaDaFfhiP9##=l#rz;Te0KT&kC9r4FMt`(NuV zO!xZgY39<+``!H&THVhwt`o6Pnod3K-7@G5!E=V zR+X53%V0DbL(#_td+~Zim-`V6H$|bL+b5&m=BFUE@Nm1<(4lVNeCd0CawUfx++OOE zRth;>RXbZ^S{k3vX}8}0DoEE_bR}_fXu2Q!8_!4D6RN3C;jTax68g+mz3npf6-J^s zi+Nf}Pg5vR3SsG63<+^x8rcD*NkZXhaG~roT==YLn4N!Xn4n3|PxLUGm-oS)O`F2y zqM^Y3>1v;O^UJZG!L;xYQP0@=_x+`q?Jw0XbS{@6`>T>?UlmY@`jyvcRZzftXNzU8 z?e%)jXI%znuJWFX$lANOSWo?ZrwcjX5B^>iWl1$1I5U3 z)ZR`?L~X|MM>D(sO{x{h%VY^<`u!tN6Y>{ROxIawXo-4q&%#<|Od{22G^d!vUc>m# zO@kA`#il2HXcZt+&*U=njNJfc;T!rjn5*s9b_dW(!sxo z<`5iFjqp*m_9_?w)B&lN)fk_62pAw2oKm+6r{r>@{un3o%NDm6+t@O|>bw2b3K;w8 z7nMQ`Tn^v6-m*W20)f!+mZ#(s1h=s6985Y-DT|bY1x)>zrvXEn_t?~*v_YmUf%-mn?bbO zdNk~Z6^r@eTGNh*Oh6D@&q?0#Z6hYy);G(Hi!fj&Y&IFQ<-v9U%;HRD#a`$ z(ahoA$KwgJoE+0#0x%P0mZdR1(NdirOUXcLPGr@nNx~OVW%uGXzl1P>{Z|ADDp1^% ztVOA*HCnf)duODuUG`$(RPu^(a$*ANAmf7~;)-!?V!H-t&ki>?h^K8LmM^S-?jmxEuMDm1p+%?A&C5B{A~4%mb)p=_+D0IB|)e4u@S%72G$F%jw@_ zN9AW6^_v!ud(L28QuB%wWsc_n67RR|tf5Vz+aVT{ytrkhX84;h?L>pbsf^`y%hru#q-OrLxnO1x2w=0hWtI-cPyDmHZtVc) z!1487#b*Vkua0DRyoFNOpkXbV3so+&#R8)^0S1O8+qlj(C&qkAOi6x|gKu(Ab&>nU zrq@vOf3ft{ zaZz>O*Iz_ZK-!@tC8ax*4r!3?p}QNT8w8|Nx?$)>a*&qp4wXh)P~S7p@BMG!Gk50B zx##Y^_TFo)i`;1WEdAmV-4p8H%S(4x(LncM>>&OgbvR zQ#ohi5a^O>{3MLPw1qe*c7q&DxwPQeug;>t+YR!>P(XKXvorSzi^&hMSmSk#MhMII zzSg%iZ-VbbPP*>WxXU-YXt5I89s{^Vru1nE9F_V()?fo#J+c#7}9hcYmxa9BrE>g{uUOmECPTRh3 zmHO7k`u%@~N1pzdS4li51>rhOPi;E z75z;-UM8P89$|N>Cp~@n2%VAyPe02?31QlXAdd)Eq@%C*LCr(2B`GMtoJ%5?L!{ez!S5MX6llWmTZCY)bN)y7BKRo(B>!Afaqd&C%@zlF?+UQcCkrBH2 zR2dAlRFJbOX_j;DU<k@K6m3H6pJsof8p{PqF<$zA~OaOXe!75$4 z2H>n|F6g8$ADJqO#rp}}0JMRJ zbst@?5x@Rp4!d~y-oM`BKfCrdj;h9~?{FZ;@YZKw6kG6crzimCv$yg+GN5NFdE56# z6YM1+)vvzTgfHB}oR(dW^%j7{wqpN|5+6VBU`hVyL$f&%h?JQ4ixk*&s*HtSUsAy-xSUMeshLFt0dr0 ztQB5fgcgqAQ90-cUVkUW>{ot+5>A%BaAG43 zDtWAkY&Ew-?Nkuo&GgaAc72+8m%Cqoi@eQinIJ5hkDI77(ygO*w*Rip_gqBKK!7)F zM(MBSWR}=z|2*!fr5FFc=2@fi?T7q8O(L(hC8g0@JR|T8U!yzUEj&zv*=6P*oU{FQ z2e>lU1J|6dVWzng?T4Yfx(jdD$G0WC)dBDdA?Pr~wdXgs*D!i2Geix2bP;KbX#&L3%38qU3wuA_6d zPB~|69Z169InM?Yjb$yU$7c@-yrHIuFavjuPrdqmS*M zJo1^r5PXHy&qaWeVrUc4VR3=oUKUL)&T4J?9!oGpk~jctSN^?fhMT8?#Ur5hvqh4< z9C7+MpL~>+@uzlc_GuEsi$Rmh!R%Ic$2cjTQ~+j`F64cGxHW%9nf3Z?6W5r+8CAHz z%f`Yas)ikHwoM^7u++dm3aUIBP~|O>998JTg<{xED8vb22|DPnSd_oKoiAlu^Tw=* zi2L+-b+FfJAP`;OxK?Ac&4^a{@Wd2c*x#vQP~|^8FEcoy*ur`A%>-<>*VSNsf>Yb? zU8*CaU0b!4)#H!5N}iIJiE#)-Kn}6k@RUG3pr(R z!4UqFtW%A#T+v!JNdjIQj8`vT^Ml@cZeVC7hI}^sgI*@b-p1-^-efm>vl$kb)eX(m z!|WHxQPjqIf^b40my?0x(SY#!fQAe_=Gs)@gMZ!uwoZj#(__oxVzj7glJz@5Ezf62 z@YfijS(#2#O1MuRjSelo)KNXxQJMR$m|3$+UviQ>^@Db~b)&Up{u}*khZ|QTs%W^* zh#(Hj$Eo;hB$oD&M9`-><%)pxY2z^wKn~Fmy&7@x1kC8&nK&@o(xJaa<-4YbuAduOY&%O@fiJZzy-(c8jXfu z<&a|oHEhdo4==Zs)6PfKu+Kh~GAVJnulW?2OQ9c5Q=!#iMm$})egQOR$kf`4ncp;( z{@{AXmyG|u{k~`28>1?o>1LuhuUgbER5S^nP9gv)q<`y{v*piI>sz{WBr#{58@{>kdQp zmYCC7%_Ii$LD=sEp_+Q|SW^O}X1hwQY5y!ne0oXR!oN9j|0;6JDUEJJ4s5wMd8Wo1 z8T%kQ7-~+Gx0P{WsrO9BmCIFGCRi2*Fl;-S8wET&be~Os*IO~`!aJuhksShQ+O4VMW$o-SIxF=nh9wCe@Xt?VjiNftK+t; zq@zNBA5lF8JK>?Y4evt|{<8Jg^^3HP??I6rI{D@fFU7oP+U$erLAp^R9Ga*lb;A^9dA||G;e0ubj_d?99LD zS*Q3Ei`Dp*VxXbBDIREahFD4_dIT~6bIq!?vSyJNYkai_%`hwNk8iw+xix<|vS{6` z{oxYzse>hZt*Lp6yf8wk-aBNh{h8GQnES*cQh9ck-deAKit!6~njOQs>njrzHlq)o zuXJv2whgFDdD6NoS>cKIM9)k-8Lno8kt7eis(Dy<2~?g$*i(odFXySf?d|Qi{UP#3 zV+)Jq6*Vff$)`>gi~$yyIUt=55w!-dQF3bZN*^W|B6=^1YGX&%J5ews=@k!r(<^F2 zNhP)|si+R}de%>5b(r!{bUQbks$;ZGO|X}k9gDvDVT5u*m9igv_0Jv?tnbpj<~=2e zoS+)4(zlNiB>& zh}rC^ELkQJt@eq#~*!sy{IunYYl(0OQMfIx($Wb{>$VwrSZ11^Zwdf1GwD3!d zF@IXX=Cg8k=H+aJTsFS$IRT90h|5=GM#43KIJgk=y#+y8I4&#%)Bx^W8r$bAvmAH9ANGJ``&`W1#F{A`^ia0wkwYeZYd4wM>MbtpF`CX!*ku z#xep*!ZlX48-11HC=)RehmUn*WJDuC*p`P4`0A|lchZ0zT)UJp4g9cx4lvM$0O&*a zS$i%kUKbfYcgzsoIN&w?p3*|repJ=}Ez-ng5K~96PM{8qOO)SXFI6F`ry0A5y()12 zY4QN|-@oTW-sAGoGp1|lC68lwFrxVu=^tDJCKwxZ+aY{_ZRiwo z8wE~V2WLSSF>_PCl;BR)lK-5bQUP18Y2UZykF7p7^*ciLBBOdG7j)=L|yPEYIcd0s8FtxGA6o zSZUC*Puj+yDw4&o7f%K2MC19TyBe^)Wp@4cm~P*k@!F0?;VS!4r%5WYKf=gP+buoS zL|di*w9b}{v&okaZi++O)m? z*}6tlf*?m=6ohND-C6hCpR{hofO01be@-*G^kwl~B}#s)ogOGlO-rMisf zz?^{b)4EG%nmFof+>XQ@FASCowt@z1Y_Yf?!-FO@0sr%F!|g zf!pQT>~tN6-z{@bDUsR})d|817|sLn06(itg-==z$Pmz*YQSZ-HS;S21oFU^pKriA zUB6%XG8u5%7+KQH+Jk}X!A-&Mx~LsaUCVwzmN}#oZMjeem@`{&&e=&;IBp2c6i|M0 z1KZ;$f&eGBoIP7(f7~hKL;ivt)rI}4@CT!nd4*9Q_i4EhPF0B58Q?O_`SkJ7XjFnBpY)7MVTMIO=;Yr%t>Vkr}5}gn^|4 zm|Cu*EshYdnBZ1a)x>eda<*lUcmr(3CkDFvirr_;7&==`@rc7a=&Pe@O}`!uWM;>aIOTLLs|0>~PoWpF>ohgQDNZ zl`D7KBwn4mAFuRg6Hl>iYRG4Wd@BUokf)WkovlW@mGy7UQ{L2KCUnj&=KsG9uYN`> z0FF*^5)aaU+c2u9eY=&^*J!pQ>pzYTHK2^x_`q zaZL5mA*zK$axmvSX}2R+s@BC5{k{^s*6QP2rE)=5x6HtQn)b!vzp@?6;xF zlW-{B*0jV80P>gq@Lv2s7>(!e(8YxbKr8%tCh`$TkC~s1$@H}ko!TmMhWApzW?#Km zD?#YOO8yWDfu1eF*S=i9Z~U@NXf+W7+6P^A2o~6i-kT7(op*7yXe;d<8$5|E%QcaHn5h3Ao@KhG}W9?;9! z^v66Cq(SiQe&H2!NdDlr-pnGbH{%{w!Jc1GW%ntLMq_JGZ~;|v)*0OD!sHh6pyX8z4CcD zSkRHy_?0z2FZH=Jv8Rr*X=x=&Bt9vFPeF~Yi@}Naq^yEgNw~?3<>Q;xU076eBom&U z!rlLx=N$%DezTi7co5bn_};ZAxTU-F52=4L#BEHME%i72jJT;mn;=6UBo!97UzW7s z_T=3*3kETO;oJ-lOtQ0J&}K!N@^LBl;1qnvk;(YX#~50XOba%xM0NczQwxjMZeGyW zSfFst1Yux&b(fl%fm2Hhlhqe=9Zg>^YRiJGcW66+qt09~AD`;H{#b2On>mrNWhhs{ z)*myU!47U{3yYxzPY}$$eLyu(0vgOw+pqY57pgOH2X5BJl>ynWX}O=cXN4k$oG7wR zx|aAh0V6(i**=j5(5ZL-`w7siAd@?*#(<-)Y@CHDU*d3^ z>-MuPCaPxsZ+=NF-|xI;2zw|6zgA>k5GLsFgcFU z84<<%Jck?xPa0lbArDU*+3z#X#)=$=`UPfEXinLygt6 z1l~$p&+0@2gcMR|@&w&Di-it`(z&2NqNZG*m_Vbr;fPaW8z*N)w>r^Uq|IG0_0W2`=) z$lQ7I))`LnwnbY`m`?p8M%cALl}fb0)lI9Ts7qUND4kas-2N(2T?S&`*)-={pdkR_44K^$jc>R z8o+!du6YZ`jt0-1+%KCIP%co%o6I&SO9pJT|DCC~VFnXH-={J@CGz5pG)E6{_}$(e zGmz+V%*Ue~Nxi%0k}H}E==fX-#(pv-q|VJVzJ|?WtI|eVWk+tnNNuZ(q0JG%y;Vb( zd_n*)-~YDzfG`Yb#wg=Ptd{E`g2qEKmc|D?^)i6wE>y>;*Kh3ZBE?)R;QoevnUJA_ zZ7 zFTYTE^=Q}}(NAnS8=+R>{68VltG{d7@&Ugc&aj)yFldxsTNh|z@WgO07-$tq^3PS% z-AM@+%b~C(Vs6X0jj%<|s~*LCo*&F(2ei~`t<;Q?ziyL#Q-j((Cw7l zJ+My!{h8>Ug`#9zjr~MXT=ZF~DSASK2&zHS6V%R3>T(c;g}&`vYlF**)?cBIkcXGf zf`~mgAN(r2JZN9(>bp@c@BP5k^@%W2=)AoXFq_CiS^z*Ewdz842pnDwABKPhf@NI; zP~V*DywATM*eAGlc>1y`+xeDQ5|)Xa{u?Opx9lE6F0u$=_<^O)ySlv!I^+Y`E=Zvh zTTH^>{`1&x2<6JJk3Mxbc=4F(sEP?`BHjoFNVFW|!-oPeGr)Y41dwolRXYRT(|lnw z0rgK$B76mQ*j%x(`|6us7&A>aNvb&aiMEcPR(x-0d${S~$p(|`_Md*t@*246-t!V4 zchaQGO5A@$wDW6%bjfr7Ib4hB+}cssMXv_u&q8|^DuTdIIryy-1q4t294*N)J0Wd? z;N-V6d!fVzLtC@X21F>9)msh_9EtH5mvN zZxKF49O*>(z=w!U3laFa9;sHul8}khCKWw|Cr?ww;z1t!KeWn z=6IN0NdO~A7U*OJ${hgV850Lns$g=*dOI9K)&;6x6Fz;PoN^GQ2A0PL#)trdTHH~2 zv1p;VQBVfGL90F(DBku2in;O849V6^3y@n`(ax^$qF*jen3EBJ;^UaX6g(;M398j@ zEvQ#=dsz9y*(vu+E{}h{sEaV*^d1>W>TF=fvTW>Y13TA$BH|lxdD41hGwMNT(>FWX z=c|K=8s+@}_B_mTso~z=m=oFfMQWd3mg@4l?!6#T8~`$`N>P>t2fk z0FNlFue`nkSP|E3cEk(rCMEKy;;W(7kS)2D*jjQs-;2mxmtk=<*?BjkwP-(zY{f3< z_1bSyz=%ep?^i&QbDPZC#eV>HG@Hz-8ghj(-$JU1gXq3j-`8pZJDPPBuw|3}t7Vz4 ztOMJ(8%ddsX4_*h=jf8@sG7DarSsCX13zl;(AO~;s-fEA%M;-0xjRVZ!F3eLYUFG| zEnA2ML;9Z0`tp->d9*|G;#()3>I4B!MIs<+nFE?*eI>Of|Pzi25JGV z+px}{O5M$X6%j=(w*e%U!e2vX$L{<&{rHN#CW39B0e1XUs% zZePi*6F%f`o7nsnHc9mPdCmO3HwFf*C+&N;++TdythMCSdU8Ro?M4zhMpFMn5$GFk zp4?CUe5h}je@QElb0fiumaY6h%)f2BMWJ|pO^+7HlD&&b=y?az$81sO5Nk_9B4E2d|(55?lOn{a|&NX(j4t`jRiT(;$wwl zecZ*>gpi^DinNtGWS~n%&|VKQ&~1XAgaEl=4~{{q0+^06pmr97A!6VCUtSPFvKU_> zANhByK!%XBc%98|&F}nBmNpydnm7~);jHYPtr{Q(=m~5unJ9sJcK7!Vu6Yj9qMBRY zMp@~vFA2^ozyV^NWXgsB?1B!U;|0a+ps&Cl32e5tuz((9$1wDqzMcV!*9Z1UXG(Sx*ruxtP?SXA@650uQY3@1L=QN5gE86h0Fi%b zq6MO|##BkP=$-}%;$q|xCPOfx9bEU{e4X^glDG-xpD+9lS@+6?jbsPN{4s&K%^>cwtkQDgdu|^hK&EI0&a@OF)Fx@e$!1Ub{4f z&-AvOYg0L4L41`fU)6xH1}~fL5{7?Q-DWg+_buBrwl- zpV`~yr5cq#vG6`R?Va(Eu+4g#=?->4nN9xGTri>N&{E-@(S$T_hm&NdLWlC)n=G+! zg=P50bg$N2Rk1y4nY5L*Z7}6#(A2T!`xV{$;`<^Q#)*(vMun9g(ecG&3*s4PAqF*fzJ)A%acf(wR8;OsdLu0FBWl9aX0(`L4GlGYMbO&kYR$DKc`7~9nzvd zuy~kT40fwX@$<{dGRkP&wbS=bWH%{8GiVe52eMjWJGBD}Ja^zzd`Va~g$fF8G^aq$ zvVJxAkt+&6@>8lPMr(cP#&CFQAEPt}KjYWaegv^;m;7(JFKCy9Exk1>kIKK>vH+62 z*f-=_pbx%+sVjuPq4VK&eqJn9t+5oQPSWUwLwQi@BzHJ}#*+vI#^fSZF++qR7O3^1kNe|cI ze6+6Bb<@`?bJPitdZQqe%-v? zrQW=Pj0y-lKWZ(EH`r&7lugzlM(Uu2OZXnNWt-4}onL%#CZF{*VRx}7FFoTSosu+9 zh&?3H6V(!+FFk<1u8@w(0s3k(>D5zUlBT~-!#&a?pJ3xzC=uz_1ujThzG(SDxh;X) zDw-hqli1~o^{-}Nkqj$u0eaH$)qJ#I3Lw5?n0kFby{ewgYQ#0>!2nDOWOPt0E(Fb^ zfmQciGLZz)u59XN>OYl*=Znt1C@_Ivwb1FGw0vO4g<3qmE?B4oo8n3TY= zYs3-I=OHNZJwRz`^fl)jnxH8$-|)f#;o$b}%{B%1s$c83XBM8t<3g5yxRiR8H!Cdh z@({p={}jFM4LCiNUf!;7P;)3t+ybdu8GAG9f4w$Y?P&8(i~FP!*JZx$9s&-Qg7uaF z4q%Oxz095gpiXWN3nwB_8~Zm$wqAiqc068f2(;jF&m!B};{;8P{r-3-Qh1OJgt~*F z2rMg#8Zfrb1|=sni-Y=XyWRs|1eeZtU*GpCvy!1Hp!8PCY+mbItu6@u$Yfouh^^C} zdWC{T>~@!M1o|PYaO3VeUGURojs07gP0HBC&p6rOCY-TnP%uZi%zBl^Ew(6>%?g`pCL0+&1i*my*t zssrNuGHh9ei*vxrhPQwY*xr0=3Hjz{UK+0_C0+N`Pzf`TA%zET8XXp25(kfBH948a zl%!yyf+dUtwIn8LHSMP-3rV=|JbBP2CUp;OzE3G7wD%TByW5yNg2)6aeHbny+%nRi>DcrMCZdOQr;v=3Vp<03sjX+Dulfx^~@k~^#`R1hpIUw3`*EMsA z267t5g^%ar5hlDnS2ANpaRm*it0rWg{J5Si>F*sRm*u!O?NEyLrn>9wIMpw~fA7KAbL#v<9e9+mK4c)~o+-rmOKsXH(!h>^=fh(;YO|n{Zi% zs3wH%@~Y;mLjMzGeL{$`bl5=PYTs@lbu}XBOeO?OIU1V5|5bs}5!5kGn}-)XIY0?r zK}dcUF*=gYhi52~AbR#-bc7f)6~ENGxc*KoVfCl0{()&XCe|joneg?dfI8DhP{shM zeDTR3EyDgzX)_p%4)5hPLHEaJ^UtsREI>|6l?_ES&|hpX$!m^wPNeESEUj1W&MyqR z(0b`>XY2qdhhJ~Vu2S`{lwX)i!ic+UyI)CC&K$DXvEK4U)1-1tCQ^-1Df7%bp({R6^Q%0{si3y5tF`%mV!nYr~1PA zE!+Hlw*w)X;9~vYvuu|ZJ8w*N1z8BIPam%hsF~7^7wJII%?%7+!bA8^o}PzB5V4q8 zR5wSkqhjs0ATLU)ou0&cPJYO*8#{rM1k}C+b}4~&;lqH&#**g~jku>wj2Xd@BY?yS z6X3@9SlMS_S9H4xf%HR zx!=*2#>%h$F!}}bNotF!LQcZ9aJagA2maH`ie4KKduaKK|`!sH9HfxqpeO z&3n%1sOxH=Ugk*hsE-9e8Ek}tPl*P2FFmUT3C+N5z@dAni^g60 zKW(>|9Fo>EF{R;0%35nPBLYNi2I@GPrvH(Jsz60bRbH#d)$^``+I+rurnwq|r@@GLKT zw@LNd0)NjYlFWnj&9m&%7^_90d{^({xkq*e5?=M`G@mut36Z zmKsw|?Huu#KWqA6qX2(JF0o#=?!aR;^YQmd3RfT>A5RmM^lvpRNa|FY*O8<{+5 ze*b$&gS&~ATXsZQCV*Rp&%<;lu6HY60ZL9lxHScpGPcEJ;IT!#1;?i@Hm4@Tz_hXv zH?i$pO9G?^3-q`GUE2Esob- zWWa~8UBcdIg-@1v3YQbc;J)rLV6oygEPm>MmYL4^Of`G8`Nf4J)C#cl*J9GH=KQg# z&w7Zh(?h4cHZWLxZim7jM%SW9>@F8~PljsU5q)BZxuF8RLY4v>$2rbEej`MzPQPjmv#fY~7qUN=`7mSY4kN36 ziWf+`9~Cw&@$p|hcJW&cqQ0aw_0)ImngOi)&49jx-bo7}3{G=OUT^uC;0DC)D*+SV zPSHD=dC@di(fwI>=jYn{7&brME!kiLdFN)S@J?-~`tvaG`b~7A-H+3`xKx=zKEtuI zzdU9@iquAo!|Bk~AqHjKCR(K$CIYg(wZfsM3)fBTlecP3FLVXRuOyxe@8r2zzM6M4 zSPS=C&%)}mx}3Wa^(eMMV{5bBmUW#d+@1M-y?OxVT`965fe2d-Ke{i7c~_Yux`FOr zCQNxXTwDKC;DwMymT*+F!yC>L^H!b?XlBvt^p0)358wjQv*#h>MADxbljxABWfr}T zZQ=*R)M9A%c$2E+->QMx*fxd!z>h{joT{Kn4tfCHK+5jL)=~M(xiBjecby`s_vjH9 zR+aHCrO}GL^g`$--e>$L_7c#qc<$Xa)X*XEmFu>F-%ZB7ChxgVr6@|T#tDH>!L5eF zu+i|le$`8|EMc+Y=Vo(zb!oltmez}){aB|bg3Q%-G9trv-2c3OqO;@; zypE-09n`8V>R0VR{#dx@D|RZNBPmU=-SxA6^MC|q<|mNyiS5HC)oK+CUtI|)6F=*8 z19ndaJu>j;l(6w(KdeQAD|~J(b7#nD=|uj6ochvw)+_X6zAD5dJau89Q=V^Im)9hm z`YiiPl7;O>8gf{wfLY)U?7^UpvJdk6rTP>xkD+XFa!r+P4}9EJy0w#C37SUN>T3m5 zF!7d6Cxz)&R>1x2+s!V|`UQgt{VlgqcsA8^uTC_+@P?9*W2&O+MFFgtNi;c_6Nb9> zUL@|*dXc9-Hlnzc;d|razY9 z88^1JcC&|iLyF^`lF~u+&>TV3$(RgIezNUXL%dcK)RoPH8z4H5IeSzO*DGy$! z6k-q7Uh-zid>9vt+Vv`(ug^&p$jLakn1wVy_ctDIFKJPO&WW8H*o$5i`l|PYC!Y$a z=k7WsvWk6z&09cMPL#KK>ON2cAE>U||j@rzUMDktfl)q3EOt2mcqam_CTWuLc< z;aR^ZVHzAQ`{DK9)fx|fQr02*9ET2zI&M9v{Vu$&?gBhaf=A{=_aE!;pV$@OD&jFi z=lbs>Zr?Xg=xwijf7&Rsw=Uu2us0jOhs5MTSbXwpIy6a8g%sV4s4@={WgAl8$kKhkmJyUYp{gD{X?iJ)g2`Hu;&DghcFfM{#ClT+OdGw)R($VjRb0-gzORA3nml@*QO^VkK2Hy+5|kTW80A za`K%gn!OjuL2nPn1k1NfVJx{mFbyGNyhYpAA5F`EFTIuVi0sXmY!6v_>uBkA7%PP= z5*ewcc4V06Z8#%SfT4z_rvWLx%Lk8j#Xip=Z<3yxqKq=2Nx|DjUP8;#XClp zO!5_eFcQpwmuoZI9n?19t(qqOiXQPpSmn|KpH7{Enw`Kl%7+#ahLgg=c+t&i>viKo3f9PMonEGbL6fe5} zBPi1C-Fn<9?0eOPnhln|iTY84Tm%{K`u*0mZhINy!B$yJk>gG zMYzW?ux37iNNRnA0SZ|ahY(!8hh#b>s>ain^gkoI@#hu8x~Xzz`HtWDrGK_n_h6%%U%ge;6l^=P*_$2b7Y}G3g2@}-M zALoPWSJ7*tZ){c0v0<-Y?9Fg;eVk4moOk4`~+RW2UKL zZ?Y#H}SI1^%+x)lN@-`^3x&Gt{fUEtX#H zkUd;K>eFWUI@1Kt71-Tj}xcP{8?Oc&-TpHFdo#3iLv0fEG3*bsk=8GEJLJx zQKe#ZOjQ5aYfb5;ig{$JOclzK4`F2@f_p;LDB`FkGeRUNqWh*f-Ln+~Gi$n2$_miq zs^0AM$^iY15f9hB7Z3Zmj^L|V!o_WM%@y^T;$2SfTC0d>zG)au>IHyPn9Pxsfh8Tn z%(=D8{31p7w`mM!H9DY%_^2oRoLcDbA;Uc z^c0{rzqVvblV@}=Mf_gPn;yTeeV_6xNz~qTFNbm$m4xN(VFa-myFu};f*e%E=f|WE zI1=lNozR3sG0#nk_7_zFad{io$Xw;hfu(2TxMx=#NoW)@uL_!WM6n;Z=UftaH2sCno)9tH7Kjq>>tnLWp}92B65uB&3_Uel5N;Ua8D|HWp?I-h6d& z`BzPYk&OxU@Vw;m1GTfrYilVvaTPijPHAuCT$N#aQ5mLFRj2GOKTSXGKwINq7YsL> zN!njq6J4pS0eToDVe)ow(Jc=zzlS&2l%E`5p4qV4c+{iMzE?j6Nv@2%(Uc;U`=v0J zx{UbzDJ`pRfM|jfx7tA`&)|k9JC@PZ(00?4o9;w}m zbPM6Q_)j~MriNpg3Xy6)>5s0A$g`%2F-AiRK4ZFWI8 zPMvFZCvGB=g)~G_U~sVTXhNF-l~r7JT$k`Yrv<)c(?G7E=}EGsqk_uHveZJFXfp`h z8kFX{7^SMV&v^K>?cKBvNY?cqqKb6(gOJzHYp;L5h#JcB&Wp!fjtZeq zA(f1uAoI#QoSboBU68xN;`u5?SRFh^Yzw?NB+pm)&2b9C9Qm0`1JJtXdmf2Xd`_T+>*`^NGN!il+i$_V8$~oyL{PX3b zkm6^9f*GAs_8*ydPMb67F>1C2Bot*;1Zq(GB0{n^T`c^r!XQ`Me!OjLoOXxgmanm` zPL-^M9C0YfyBq{KF`o3#sU+`d7CTUvKrwP%PvoKUegb}Ni$%=F)Q7Kcr8#?g6#c{1 zScZT4%E=!UclQ6F%xEJox>j=}_JmLj%DXxbVrUq|!x^P(zW}qX`(5*RDh9lT*V3Po zICW?e;Ec^t)VceG??0COpcmDsTl3WUH639GwyqPx;CHcL+5f8lhZ3MHIF$C zhpaZg3dM}rVX3=(6ZI=KdU493AT4$!G+RVBCjq(`Mo?p`lCa-K!_?Yd1@x0jiI?fR z?vZM4x;8ZT9Z&t@)neZFWRz)_hQrI-2b{On+u5^Pad~fNSOh%ByK6b+q=%nUMrDUu zsDnFHoMxJ2BdtJ}q`2-Z~7uEz6s~*!0(-c>9T?38pnME&0S^Zm()QX7n+df5-N=2%@A5PaDpYHM6 z`6#@|sMQ^~c3^^$}~vQx6{`pHbRLZ8=~q4*s=Y`zk{oXF8AXc`D}9B~|b2 zP4K0;{tX##10I^2^~vsJ0EwkR*^)q4{I=eY{BTl|iUGFTgh__eF&IuZtD2C##1;_` zw3Rg5`3;IvMp6hv6Oy!ZQe;Eqjh_40(&;q^TUeRFoN#+x_X4vRX=uto1XpY5SneEZ zu#~%y01^7%at|Heo0JEYV87^ELg-Hp#e<{7fPFW1{w`3V9Evd5a(-|tw<9gnxxagc^Fh^_$)<%I3Q1D- zf0;e(c%NqWF%in3>GD17|C*?VV4bHCF?gk_>yS#fWr*Ib=##!H`ubPD?m#G|f94Lx z`(54ZGbp`gjT)XS$y0=jAIg;(TpTsOP3dR!5Vje6ak`~s$Q1HUIlPcriD(VA=ohPZ zLxGSp6G0It^Os8@I)&l1F!Fdcwq?@IdR^TVZAkHr5yR#$~p1R<<7cZ{(pB8m)s2E0<%aMMlD#^b*x$yYdW zasDbDKh{m=e(r#0#5%hYJQjI z)D4^Y@m~0Zk5XuTym}6XfB8}zyj&F6@B@ENBkMR{@ro+JH`f1i)t7&>xvO)xhDPd# z`9f;ZRJKR-4>Rua_7?16mm#2{;X@e?bKImv6%JQpZ;|NvAdANmm{b7HaYe;I_;prm$ul@K}IK}w%HL)M+JI(>F zA(?H5$A$hsVAl^)q9$EKxn35F8NE?jk;OtuSACN0CrbG|CZg(PMI#}cr6_MvLysDz zo~rnPlmxr}MTqq>rcGRTFD5I1k6qUcXOyk#6mTN}ysu4%-jqaNyp1IC&Atc-{Rk`W zQHMB%_;ygxmv9=Dt(EXN06cBPN4U76-~n;kqi|KJPMy)|h&<>pK8E?1)i~`A9lp;- zZYMlZS%xbkK)TCKPBDf8fV*#=*@Ps|KjLWlD z6~YC^&MN|vl(eZ-IZFRWQ}i-x0{(Ro(*yGAIm@{q;JmU?q#$i|;a8W%c+cB?yv#zcc+t`LnAt?GVNjJP81Mm)k_4 z8TgQ#w?Cy%Whw^*f|aG-(ieYrPbV#^)B6#Qw&>M3&@$@wQ{QL1Zdl;Mo>>|Ay#vol zazK%>Czp6oTg(IZTdWe1*aSt6RU-6%+Ulh5JR8G}>hdIiSPR9%gjU9B5^n|x&Z@iT z23>wKiDWOVbXhUfp?#3}WKY;nY^Jn5yp!z^IL22dOZ?$g2ywD`F>cGbZLApIl>INg zB0|-S<~z78ceb&jv{U}ZuqE{Fq~n#S z&+)I81;W^Ra0;hP)GYOy{_o#y;NQ9egD-nL-%XD+R1bhzJx=zwdz@_8I!-o6A5Qjp zF;2E~)P7=?Qu8Zf`zrVOmjJvy=o+Ptc+i7a-c}0Q<==44A72j{Wt+XAK&@EHYGf&$ zF->%R^$Yhscv_y{#;9Exu_M0dt%w+-+r$n>fk?|L^cYR{O?Z0t!Xq1~2DM~r_g8*V z`h2W^Ma(on;fnbVt)41Pv!H{7BECMnV$`uha8-QDMc$>W>DdsiJB82xIQJI@zGH5R z$_$kR-o<=Kr`CkL|Kk0@&QP+(y_T~w@I0WFUb&~_M=b~Q2RU(ia@iD#EzF`6I=0tG z%W_P|f)&{bR{l96XYS_v+hOU+5)0xfBggdytw;4`guLm88r0vbuXWUO!L-(@=U@q{ z86f|0_Rha$4l3Z*cE^C1CtDcxQh1*F!Ed>6Z&zELNYN7XtI0^u!{#oWP{K!1)ISDxBo6Uvhgw%+ zheUh1b=;yRvPymM^}Nme!{2O@GknWYn?K>4+;@+!GG^2KNPv_%^bfn*4|v3zbb(n;y*&=y?#dVbmet|nCxk2iufMInpv2VzMEspAw>Tmwp=raHx*p#dS`|H9|KQv!v zp)u4$4Fbs3#qINR1Ul}{)NMh}FJC12tM-k8i(Pnqfbgdm>*}?|c2QYp_`$bT3>slAPsvYT3&KzO9R zeIKzhTRnUyo-J=wAlpx$DoK=#h|3j4gRwb`tdYA#uzEF&l0%Pdm3xMoKp!TpsnJ#M zvzI+g>f-q-wVA%`+tnRQ*sl5-sF=`ZL^1g6+WNcRQL2))z%*h z(qFZn1}@9%hzBW1w(&|Tp}9HVdlb@Wzdc!M+nRb6f!NJU8Ay1cYnt2IxT*i$gTN+l zR?cBrP@liBAAd_4twBz~#_v3?!c`M+_Oljo=BXL~R7Ec)4uU4>o-k5tyYrIlQUBY4 z*82BF8>A9cQQ1_b^?@3toHI(2%QF%Fym1LfGz7biYT7!|N(~+$w>Nc zt9WBT^=8n5x|pNztLJ!oZ-h~H&Gl1@7ZfaFQz9vh3Q){I`OBDHsKm<0RK`I0+px!g zT`(H~vrrSQ7q_L*V@cN_NfI@(aO=3d0n8}$Ms(yX-bd3`i6Tl-5g!J0ZQ?-@ua;0& zJP0zkF|NXBl6u?xhwEW{oo(tfIio#sysFyE^3L+mtZQrtV;fDrx_{(2obRwSYT?y% zpQy#mNKmvxAF{ZG-soYNRm~-Lghr;5%V2wTk55q^)m07D;Mo=d%=q^-n)2|p>ZsVNd8*o+u5(i6J z-ph3?J3|-Q!B*M3rS#7kSZ$Ha!eOa{O6gf;ElD_#0j5EOjI6PP3?F5@ww&kDll6|s zNi#BMcIMIHRKzb0OR-fXjT#@Cr5cnGpyl#mQ;}hIc2OJUp%i{A=!Iw1WZ`3ZmPU1nQALTDWc6k` zXnuFXc=qJ}*297%D8fs?X84>XS&9}>k`mH=L54k9R+LeZ*mThWd^3ueL~y1kcQA6V zNfP<>YXla%N5?P`vtFDf`_gv?6sWboKb%Lmx5n!V)Aq=sTW}F0D+wIpu_Rk_KK(4h zy&+?^CcRv;_ud$FqhnvR5+&W*I_DPFt`Je5ch2?nc#7jB63ZJi%X#(8oKh8{dcF!q z6!q37iyP-nv*;V-cKvo@6d%h*i#ZoPzpS45#D(Nhmv$%WL-_KB-GL*cn|N-=P(*Ea zU^oJ13MwM$4!dXAxBn=tewZILJfEjcn_8~ZhcD`L8Hu3m>Vj33R_ou46Gt1SPBNQg z&2yts)6;@zYBU35=14ZMmwg0d!>glT)7#>sIaYxx87^0|?zb$W$2y}oGhTC}an{Cy zD4Ny>ru%4iR>)YNZ3)|0-_4A~M4$B|t6ImHw1a%~LktUVSRJhnd{J$U4`Eu%-WdSp zMNjy2t0%2K6ThF{9-j1iy`Io|V}2qk8VOjxw^D0W5j>_MoI+M$&oJgtcX;%^j_)Sf z=){3JXLz`KMsj0$vLd}cVf{j7a=-6B*ytGSvw69F0X&DFvri6M?R4v6k!W95G51=h z#FoK+-ZIg&d9}XY-Csz*jhXjtyu4L-b$CyfYQ1;7_cx9gG`f)95OZVZ^gF+=wsuBx zZac{A#in!X*|vQUHgJ1S>}4BpV}B}k;(AwI3_f$!veM4zYC5{z`6w-)a$JWI9%P%X zmRIz;`{QM@Y+`e7(G<#spgvQ8kDqwa)z2c7CpyL>3Ezn+=_J)b9@>WT8a5Vagx`d!Dq@?Xv| zVGrg;_TYn^@vY5#v30s??4QE=!J>E#t5f$OC_LN6j?Y#Ts2c z&(Gl1s=)`ftw!+u?91EoIPRmFR*8s|f#IC&{SsVr_U^76g!7#mBB@CSz{7A@HXXQ_ zssom^@zKqVg;1HZ`xy1MFmD)j{hgAyT94hDOR<-7iU#|btj*l{Y9*6{6TVo*{ZaQlw8QVbM57(! z{Z6LgoEdw@%?)S%-k$z^ctk$6# zEs2n?J0PNe@5U?rVCt4lA) z{!_ts-X#U9cTY~fdem&rYOg;=W)*NJg%2;+Xsl-IBVxx6Xe$cNn?Nrzppr@C^S<7~ zbP0OfjTmRng)(}h+~Wo=Sj;}R&2u#y?V2d)Lj`8~3~6rX&^F>CJ)OW?E2I}CS!lNC zaHGBn{o@P{!?r7K&$@*oWfUAaVee?}A+_P>#!hssx5%dM>e02M1a3Ha#32(4;wdvS zNN+FQU`kt(OLk|R%*a0i2~TWz&z>l#7sj6ILF1|;J?vkR*^{VQ4Tf=Vjd=aDCiB@| z-AFGTFZJ_x^K-RLAXGuaOQSxIQ;W?ETj8dr_kj`Fa|ZeL+0F$E(w|G;xjg`{(ZgaE zS}Gtr)=|}k)~D8Cu*5Rf0Xg(%hemUs&&1C#y&7}SUSB8Lco7kl9}#QpX%>mxmFL$J zy-%D>y#4pjyBCxn6fPhNCFlZj1FRK$IcV9-oY^Qtt{Gp+2Xe;7yYPavfA!YY;GfFh zZCBSnbCJsde2iR?KG^2H-5EV$cThgaSfI`E!zwJ#i$B`aXXBS9^Mxhi;- zQ}&OMbt#5Ky`E3kGroyGbgWtTJCJ*t$(8TzB~9YsT$?Gq_Ij6wSBulKuMpgf9o9sz zr|vFnYPaszvrr|=E=>q^U#};gLt1W8Xuc%esqyq}v=I|0;~|WQ>8ftB(mk<5@Xy!B zv4$J99sJ2SfqRZVsNk40%=V%55t{+^d$}cg&RQ4C;HaX&a@EnmZ_&*9cOEYSm)AWX z_75p7Ti_90?!;yGJ4k#r*C?_j8Ts6mK9Z(J*cT}28CynWhj#kHRj%*lF%UW z^*{WjSX@p@^G(_I!4I2;4+o6v(U8R6g_!Rw4iR><7>(`anFm=m-nlXG!+lb~cu)5Lrv%37?VTP((JDmwHhTQW6 z@AGIr-c$gsyhQ8DhA)iSJC~39{zNrJ@M5C<{yCqL>XRKx!1j5cNS(^%>-6#AsokP@ zo5|$!Q~KfFHsvrx-ge@W)<$3PZe9ARZKsbKtnG)-%R3{x(ftzoJn;jNMKRbju{Cyb zb~G`tf&3lW8CpWJFyPbU|7XC&#?H$4f2Az{N6O7jCv9SD=4_77!ul&xq!YEUb~bUu zrxUd{a5fP(F|so@;pK(=Ki726>gHLCA!c**wLK*|uU}uCIPzKDM%zd*FsfIt##Gmj z4Rd(GhH_~YU0}kT+OY!}{|BwBQI&BXid!;78YIAI%Sh?nChgMxJW@%ni0yw=OZ zM?Z0K)3P=9wsn8mz7uT)+~Vo5du>N&@KRKQ?zy%uOm)D$^#@&)6M!Ps_1cjf@3z&y zf8<(k)YYtL@OvI}$o>aM&`egm{RbSwL(Vrb-mmwqm&)XoxGIEYYqKBHS~Ea?uXbl! z+sCh%RDH*6weY8%QBlTVY2Zy_Y-=z%1v}$$-qG(#$d63Jii`tV8K9q?1p|Za9(8Va z0G;-QPX{kJJ1H=`y^|N9Mb<()B*}BXZno|has%6VQnF4NIPpQ#FK;xSFuuZrhy%wS zPhumrEnrz^sQ3Fvrj{7&l$Y2Wi6abx2K1@r~ttQ>8q2MXS>8;5*>!QSc+A$Al&)|Ui2iep<#UK1d;~{g^dKUt>5k^D4 zJN8yTgI3i!* zz$p*NlqeMD^ha);FNsRB*G}RGZ)L|pCge7tuz6(4>OIO4sebMO74)3g7#)2 z=`0_K*F)+w>YG+LCdqc6HB4O+0m@Rqm|N)h%pRTO0q#u{dzYAO#GTNV<^W9ojGsWk zvZ`I~OdDuE=*{tJ736>$w<*OGD4|nhX`xd(!*Uj9V5$=Oa1EtfradClqVA`$intH; zj*MJxRegpD1goJE{#6bEB9RqK{G>agr8mlzkz#i4iCzZWLg(QJ} zDG4ykw8w~3zE984?81HOuVAFNm8uCRQumXL<5QQ(z)%pkC{OGKu`u?rF+#+AAms~_ z0_w3IzpV1ulsa)o6ue^xNTedw&hq2*QtGMlAP)DhfyG*&Y!*bbK700$st{8=;xx^0mLkWMj7oPj8s1K&fn-$5K+X9g)(Z4uP1ko)M~oaymmO_d?|`r=S>r8hvrn#CroexU&X!A+RWhN(ggLCUJ68$>@`WA>f_YP6)MER7U}Tv&r0WoBOH|fcpDZUbTYKAuO@H9R7?j>S+qr zf>n8I2PY5S^vy#XgE##N)kO1>AfRtzZ+$ei*4G=P90}0{{e-2)HD2KL#o5eR0xQL<&vJRcM<#_iR^}DpXR)7H47#I*%gIuHHMY?@UI^D6q|*N z7s~8G6l_vRiF>oDb>GBjwtsh{dE(pxZP{V4PPIYvGP`g{Lcj$%%asa0-p&$OmEk7E zs?(o$gVOXx_n?5%DFM#+;v*0X#X-wESHNE2Nyk_#<>|o5D`0AQ4R0F`zzD53M(j&||%qGs3$_OBc=iv&uzMfE(C@vE<(03xP5QS~=ImNf}k7AN% zM_e~e3tLo->bS7u6xb?5?L{GuPgd&jkld<*RkX^OtWXBv4xQ-eG69r9*;S>wFj9N^ zq^G)2e=c=B{j1BQJ@jYE?%hv^5M3#2uQnhd&{|%DEChE_p5J9!rTWi|sHw#lcv!Xr zjFNK;R$z-+IYg}b)f(U47VG{xLYcjlEp3Y}Vg5Yk50E{!GM#x;Y=8Vx-MV>M`oPjgGSXJNNN}v0qh^N==6@ z!@{1SUxk?v*XusZvuGe+aMdlUHz^EVqvw&;s9%$-s?R2GQX6%}Snw2bjXMO0q%!Pc z{+GbeS_Ddox5AL3i8~!0O<#myDSuQqJ;Ih08OLk>q+-L&FA>GAe&2P6Mh=KxppOr`a1_K(Qo*Opx#<&acu zj9T?!G|Ysa-}6e9wXkTeI6{4ZRRb97KOojLnay!113Ev!bO5a?VO-3-y`}av%*kFnFiv26p)V@#e4eOa`_U9KG+9vX ztNbQGlmKG;(D-6nmQWSw^U0iebM2b}OuxLS(R1uPqFxsPJYa8{uIBV$5r7wNd3Mma z$ubcNKLVGD0D49rXI2ne9dlNMB%mvVWWcvanYJ-c9vm6q$w3?-5Gk~bXJ^xYPP#!z z#(bxjX`2dQJA+w@!CWYgjpw{cY!n|!D?8TxKEvgV@DbsK0ZTWIjRz(z`j7MK)xeJyj8GS_&ti z$Qt5PLpI)~lPT0h8Es#1_wwH`7JLb&IPW_N&v*};)zwK>K_YE{ znOIC5>6R$wPf=%^;BmTr5h|Q;tXi352R^4+QgnJH;wSQb{?o@4{@u{y?zjUJ+<0I} z13;#r0oCP57P!+-bDx=`tI2QY)=V7%v>7UEMFtxkBEY=Ov7c-!VfGBWf>VD10^5ms z2?^ppEvWw>rW&3cwlJ<#Wp0J$g&>Oj`K3o8Il6dJWBYkuIT=$OW=5~kk&s`#Oz$EV zC?=YMs6e#aTEw81k)L1wFM&C$O>DPJV6txrcQvIh0ym^KQN1`F=t%Wk+9A?8L*9{w1l?6QRfRX+z=Q%PLY#fS-e%^T>f zufZ}|2#`RUrVR+G)!NBIaf~^w^Mf-BCNmzglcBYn%}jp6H}nARL8)mjzDs5*9q+4I zlmu1FpK*2qFAt#^sa=oH<=Mrk#GHNYa#;oGgN4zea9G}7R~o@651(KlXhbd`JexXh{FH=O$OpJ5t!c4oFq#(7VV) zh}oDLud_rc@w;b~CaP#M&!CO=c*4X>Zcm6JQIuTCyDFC98xH6**`nns_bZ}RM&yRT z$;#JD$1#yqzbi zUBIPQWPg`#5A87{=&Wi*w#8rv3_<`;ms1nDyp(9#g2_-5qctTrsBX8^+luCUXgczJ z63$d3Eu0Q@+uT-`vBPOWyea-GM_9a=|O%T0^5J%(sv z8!FjC8M5vKY4o~0tYfO#S?cAYsrzfY^$z!eb^xOo$OR7AtiF}kN)spus-$D2!9u*) z%bVSvk9_>%jw~?^%FN@;$7*JH!o26jnosU}%6g}JNeOwQi*N~rACvtnq1vAzXWH;IEf;bYZ+FIm%i4K%3Z&)R?)_4_9 z%i0X#SmZk%)EItjKwoop#Vi1=>^cnx4y;UOfuMvrm_o-MzrxK)AB*mE@^HJadgRC# z9jrd8t8&MeUSOG1OhDlWjd}(5^(xyGq&Tu(m1GDhF}5lQ#1C1&#%Q1_`EK=DlIvCS z#Q89;;|3Ih^IRYlpAFxxhB)GoWO|K)avNrTYYR?mCplC|cJ)6ZMi+yYvJRB{@CX8G zTA}gok(hsGJBS6!_PsG1{!7W<;R=Yex*nSq5Kp)yrqCA#IWPlrvGfy#0ypK1sf33 zIS1YM8wfU|GpuGfVpj6QXl#g4M?j6I?MVJT(vbbi1A3fG|gUO&IGp{y+S`D0|-DoYK}9lIb5JYrh^dgPa6 zSQ_q`ArVfJ=P4{|u%vxWcVg<`U6%J?xMTIF4tROW6g5o;z@l&*%hND?mjELX)Z36e!Q@1)6c0rK2#`l1R{mqLc_ z-UoB{x$AD&xV>?C<1&Ao-GJ!w{i3zba4yn;M9*uxR!;x1*{A1MQ6DXDRp2~C2Jw#z zxB20+}DdTr-^{YL_J}3B5W}R8q$kXa1yk)FBWkH@f}m9(2e6csf;r|hsKh# z-=HfYC(}9nL?vR;o@@IgvR@4I>=k_Uz-Mn6jK4b(8yI&?0)TI}OMe~&)K^%Z1fJ`B z*iX2TFzrrNvRMdR3}fECB8@+lX@!f{_6HIe{^(JG<`8{I`4PD(-#Mf+Fh~O4n_no= z@IWTNMn~^AbTf|)n6Yb*ev#QJkwZ7oA)nyF>VyC) zrFMo2#2as6($N`Hea_XU0Owi^WL*~SvQM!Ne&$DB3Bwm?8O|}TcNBW1M(F;_a_w8b z09RqGED6U6;kM#I=Th|l+x)#qM)2B=Kr|HzIaXKmDY6*q7)R-gTA0^cm-s!Vl^_n{ zh5(`TsG~zS`wjmxWIF1a%>GVIX|5}Kx&^=e0HniHg=Vh$m8r0^Cx z7;zZhxzF4&gSF#s+1}Q@G5vx5cTd+gT;NREeQ))HRC&ip%=k%>!cztARdoBeBzSoS z|1?Zx{hPVonp`9WZ)U(h@hX*Vd)arA3_{Vk4J-MN7O3iP-@sRBL+uP|WUnIey0GHv zuOl0nT+t(-2Jgr)D3&j~1#f8`XA`Bpy}@Z4;TXv&BVzZxc{J1RA^9POrNqpqF6N8k z!tOg1D(v&k54}3JtV+}?I*}t-RynGmD~@{0kLCGfAX7cL+sYK)!WHiEE3S0sgg0>W zW!i3dxzVo6uovLJ-i_K`C?^09f=RS^Yxs<}X_IJdW$(&}9>Qtt-67z<&_HAF5QJjD z{g9WY!;$2Z_sT#nX*5I?DsdwBx8&}5fnOsii6HmOUv9a*@a>x)QG<8vl5ZNwSGUpo zliUZp6G1%sJvq(jgtnycNa%@!xw~?a?y#@fNlrOd=%2zF2zpX7xVfUpfI_t)b~mF% z&4;OVHwP2oJL{AfbW9($kMLO!=^8229&pZjV6u=#>CL(uPrP%_$Ue^3!WIqsOT?u< zE85%cVQBcC0@*z|9MAAVu+mh-_}JtgD+1lSP^^a`)rER2$MuD9r$IXWr6rwPiC7pL zqWGD)3<`;h8q+dYTr*9f8|F~u^9$+^ELPqyNV2rOn7&9_Fb{N=2PD1{c3A$^8s(sv zp*HwWfP*bwu!zR#`=Rp4zPrjneg@}pz|SUHxtxOmR>Y29!TLjQE3gJbu~p_)e^5_v zETY;cl$h|~rFycfgW@zCvcJ*AFx7Y9E;2A30A|>szfk>3Kf5SkUFPlFVcM7)E`psI zC&inx#1~^7cyD4Y=7`NtyjZ3tpu3JLThD4eNHygC&GS#hcQV2=F4sY=KE1lnb1tTd z*Oza{F5Pdc_?OH1fX8E<7alrhQPsHkX8aZ{6FCL|G?$!O#c@oXQlrykeu~DX!`#zNS>@JWw$8`-2?T7 zz!~Fz?Hqd8RYMb4z=JH)c$zN)7o+#*ym_U5VUgd@TVhIF>Z%42gx8(_?k@gQ{MzWI z>L&ZM`6spWx^qX-?_;t3yT$AB^R?dYht}=uMN9Rz{AD`@-n+N^tfKgaNRv|eJ=pk< zV(*rVUk0~z*6^;a`Lm6q8|MIv!718 zh^1}n05SW2GsUqFBnC*Z2s2(L?sbQZkw70JB~`e7w6;k~5@x$hTdX!`O~|_ z|G+1%q$p0QZCtyEEr)y~yn^9pitgx}zEzZcMYig=%xu3m9eGSV?NCqi8~ zr_rJ%8j>6XIz=CmIi92`Fca^^-jh@+ZInHgO8Mb`D z{AmlnN`#!dnwaN{D}z&^_9&t|fJ_ z)fmNLUp~bJldQ*YxGXyi!4910@sY3TXK4k>fvkGTd4Oc8G}+7ahao+QO*o(l(R|Ur zct&C?J6@pXVrT@W2@*g;oXQFzNaZVx*OExqn^Xj7u9SRi=|P9z@%3 zNGZb!+?eL|$&HdvXY{$a+sv}TYN18jJE|xeX;ysDs>^wyj_NiUM>j)mEz^U{eBR7( z6*dCEs>5UWDb$Ppk3-ni7ohbVsp0m5sdS&48g(M_9JNZ_@I{yS^?JU;hjqH^zioh< zlk;|7P%!niw;DbMNx3%XhG}%Q(#{>%4Zh>wIzI4Z)BpDUmz((?_wgS$!$8l({J*g{ z*8h&hvHm~B;+C|v>~LBUz~0FZ>-l!Op33T$4{u?aK|TM>qlRn{TjFIr_P1DbL=aP@ zWUYQZW$&4|jghE{wJ7F|2`w+)vSLqrJMFz0kdd|{J?ee`JWLjj>UC_rO(~=au6{Wa zyXN9)6qn`=PJ0rOHFt6|N*YE|z@Ulg5p2E9!HF@(G*A9``L=(2QN)mK!44VItZto6 z7Ba_gYOtKKS3D=ud%AggJU)HHjglEEPI&$KPzbhK}j$N$k2RD$CvnZw|n#l8)dAG4f znb2(S)!$)bjve;v|7OWNp_GXg^7Dv_gdub%c(GfU=qHB~W_$(Dz4&YQ%pHB6MM32> zvf?unE-W{8{$;iZ+s32)tx+(?3!*XtekItvv02lt`jmGvIsNVPo=Vi$nCf0q>c_+R z^|bOq79T&KaSqT^YIh-k(e4)-nIKN6CawAWOvs=Z?y=`CYx+YAhs4YmJ<0yp71GUq zN^K%G6z?3y$IC|0zA0!%Hnu0Ncpc=B!oHrChImCNeFQaPNYpVsg*Q+B0D^pTIfQsY zZe9`qtr$_S36WR0$3Hj8t>+e$k4Wzjq-~w&=9TK*b=e)6WfRt%!y|E;&L3f0ZP8(n zam+ueO=fY7BO!?UvDLbb?Lwf*;p4=bn-@-x>gL^d4U^&RE;Ww|7ZATUUZ&J+EXnS< z>O|#TbZyd&q1A7uC%2hu>{0D%|KY_*v~geYyT@RsHR?O_;0Vrw>dE1N0xIdD0d0EH z+a1*;_2GsvJ}*2Gl^pqvJ)IyakD88yj-~_wkcA#w8*}#tDr~#W0;8T<3W@Z5<`0yCFL$2bsCiDqv(&QhR1e+q z$KRa>Cx=tKxBWI8$&h)}a67U*Z{y7fL841<)MWDloVGme{VvhOT^tykDHcd8W=M0E zDtULC+brsVK_uW6WQoQU>NlAT5h|o=284r-%C8bDn;@8B zhy)Tj@DSITyt8gy(=KtX7gY~l1-aP#x|`em!fGeR>>-NipqJkQ6g_8#r01n8q9Gz; zjJ23^3AQVWPF<@dV3Cx2f4X`9yca&2#0Lzzb34N4ert2Mf;v*kX2eyL)OYKiBX5V# zj3{agqrH9qd{qlNC8og01~DEwx|pem)MRGqG&5uBP6?t+4ZwYfKRS^7@RKd(l%QMk zXxmv#KED0p69{FRQ%p6yTK~Ybwp$}X+c`)9mFDHZwC5rVNnonho8p8U1NWIh*@QyL z?97&Ra0}Pb!cYFHx=l2dilztW;tZ>lJIuO+tR>vh7#D@*O#a`PfS?4?P%Ar2;R! zJMUN*MQ}SZ1az(j$BP#T8Q0W1dwGwWa}eIT!aMJn%kj1sI(o@v*g(P>gbzyGZ}q${ z>)D1Tw#&O(V?{eVyKH=gQaJZNlt3-V>Lg62llNSjf8|fb1Cl;fm{owemMN;k>UhYR zhZB$$js4sKsc?p=C=%*q(xTaL0D$rM{xV$ ztrR>R?R}`>s76n#SK#arEI6o28>3)B8bgmxNEWsgERlEpX~~<&hg4lBP#DzxqXu%g z4Mq0H1)gQB?&nYzCG@ZA0b=kHj$NL^u-$y0Kw8Xnh6=m-y^^;PPFW!v#+&-#DC2M+ zNLHh1&we9!h|ZRhC2Az-W&_>SZbauhy$!~RgDPu=y8z~+8}fL(&tXP}fhDSxAry|$ zN1K+^Gk&>U?7MG_0{wiW$7WSM(w@4J9!=m%EdH`6&#s1hV<0b-?I=}Gzy|QwWUW0H zE7A53#v)!ccF~T!pYYz+AVxAmkz$!C&9)A#qJ{0z(t8g7&V6FjCXL*Q1!MjZ*n4q# z2-~K^3DPorQ8@N?GNI!ow(LIT?1%*0WY#>5sg1Mg3ff*Hqh662h<}!X29?dMfaRKI zK$_D4*DOo|zHqZG>@%dGqOD^?r35qF`SWrSG6!;46$K{sVg^NT0Y3RsvotP@x_pp2 zGY!j_%&irfnY*bzGXCBbHTU`?E;3ObwI`{$A1ov1NZUA3@avY0W{ZSJrGUS8k08-c za^-lH^=wP4E73i;>KuwCifwILQ{(OsE+SRcLY#J#KX?(Vt)klzjbjP~K+@Xn!Rw!@ zyC2H!<#w6OxoULl`@|K;Q|%WQa=c(Ar8-%~PY0=Qdr`ME@KyEGc_)+zp**8eE?Y*U zi>*vl;n{H6R#GaN9m{@69Hmix{Up(|3TmM_7Se>4iq;S$NRmM1DVyKs>Sapyx@zUgzK$o{2wrAhb5(N==xNY`z2JN?G(QY^ys3o9 z=R1haE3#fIaC@VKV63b*NU{5aRY)BkFI+-LcUlu)b}gyp)VL)yLVoLr7fU(D5OBu= zi9)ilD!8%O*q0Hvg*UtK?=OE#FxK#nk-4vSuv4_UWyAtAx9;Jzq?r=N{ACMJ;A-&` z7OM!e%QX~9X?1gF-`Rmn3v^iDd));^*|u%W-j>NZ@Y=hvzT3vZSj4ohkxJ(h4JAIq zQWyt7&1v)M$tVmZ$~yk4Q|2}0dgUdtn@B6!LaY%b;nf#gp1ki9x-3nz95)v|eypGh zWO}w)K|vTt1#i!78_aG*Hce0m@Pouk@$_?4B^pq@VOSBtr!$0iErZto3vi0V!DW+e z^`OVNRs_=qaenMCD@3XQ3*gnM;C{lTl(t*SAVhv|7^3WM?_j2TinLL z^_L?_Mc28w^eue*H&Kz1naowJ_+Q&WA(!1Nn=OGJe*YCiTlcReEOaaL41%0nk1_OI zdA3sK{AQr0#EH&ZP7?k@6Ix_i8z!R204PE1SlrTF0==3NR^yVC27J)P9333ZPKN+N zTxOXfdO`d6`lh1RZ7deQZ{-c{ofut_e<$A%+S4q3IM;f7BrJUBJ@1bK42gsg#h^Xn zjLHUi`OUHxJY>qzal|*DT`%kWunO|94uJ;B>5wG4$-$In)~jKTpYNK7hu{mNZ2sX; zT#U)1{x}6!*)ey@pHNbsy~`e4UOz}LAvu(UZqoO*bv&t z99=&qU{Y894|{S&lq5_jX`7-+{Uh&UGwx!^F8(ZwqA|-@3F6(=7-}`p zZr!N|TBWx>yEnSlJ>Xo6(I0}M=f8&LHz}adnJ`XWT>At-I!(;2-Z=!`5oY?W*+UoN8r!roe9G(q=}Ej%0;gLJ4~S~Saj zRevJD3bPGCA}-Ei>rW~2_OD?+-w(c(H&V(U6k7wUB)$vck4bfc5(bbqO1d*~& z0Z!>EE0yK-f4q+5&YZ1=l9(G$=aWKzgOI>`|tx zoN?)I1I2M$OCACI;FH-ApSoPzWM!{XlGl_ImpFp4-ifZJ5F(cnz(oMBuEzl)p(JnV zj^t;YSF~oOmDU>1Za(>kLI5e$wKx7`Olbj%f0n0gfl9&V^brCd7c}ZG@p4)lIL^L@ zK<0+FaaSzav=cKh4h@^vKcXtfcvA&S&^)WbY_UpEXYIA3tSE{u)7;{*0y`P;#jLa{ zg%7@NQnM+A-ixy&6h9`7{6oDbzFZX)Qa+hi0W3iZpP-zJxOV z96mXY6_>3^FIVbjrQDQEyXGe8HiL0VlM_Lg86_QgKrW>{?g4jRCTmD{9~vBSnjA0B zj)pO0q8Z`dQ49Fi`z{LvB_`mmmc>6`P~CJ@;6H_Sx!NsWn#8#lM04nK-nqO}dG>Hi zFLL{w*LY-*(DHiF>54!Oeg<_vt@ZhV&0>dC!w1A4vcXQGxerSj33il-?6~BdO*!#uu6M%HdKQ#DdvRDgv(9eiI=`y`WGShG);FcZ{7Z6I zE{XYIo{k58^nq4_H0bi8CQwn+;xA}6Ldb7-f&leG%nmORUkw0|*swyaqw06$dX7%fFz^6m-Mf)F?1O1j!c!k^rI zA1K5YDGb_nrVRP`0D}YEMi8dN4P3f7EHppVeZ2zm@8glyBgp91{L@3~3&zJ)AHArs zll<>5JwKC~A8TyJ<8~{Y!wZCa`}3PFdtr*jvpdD^W0$NR7L7jZaSo~>I6g|fmyr73 z6a~Ma2uGw$z&O-DySN5D_-j&0Fp!#ExF#(Aq*GSH(_J&GU-`|x@*wKpLDVS|#o5CKp@SXUUYThr@{q~6#$y z^Qm#Z)xeu`Mx3OOkuq9njGQx4v=vv}-SJ#9M8OQ+>{juHj~F>^{s&7nDLJKo!L9Q! zDV>l*fB~C>CB{%QE_y051ojV8l+-ZCx2CZl1bxn_+|Q9DCJR!t)i0i;JziOE_G=2% zR9CT@_S~eE`_x)YZpp{(XL}N-SM@PWBt=7=G?WykLU~SgI4VvlXDJQ8vQJtp$_0z_ zS{tF6+md4K^mQXc5Bm!(YzQ9vzj3txh3lEwnVJ3Hh_;@5YvLLLFr= zBjMYxRIccf%s1JN6@o?QYC*ypRnN-81_;1U0ZHjgQ}*@lFf(=Y(*39=PTKEVHZLhH zE#29A_-ZgB+F|%i{r2M*t(kx zYD>6@!cJE6H7mwUuIoH_2+9TXbsrKfZ(p+24-;D_XMyD99x~W#dhm63f#}4lZo|&m zuTxc|=j+l5-2ugy^&|*Id#!F`nukdZ6PwSND-+mD#SM8L^gAeJc6|+$mE~^=J9Exm zafk`vgElk%Rsop?xMMqm7i`}n3{;zji6s&qO~crxTh(Re(?gmyer3KI@CZdtcSvAu zGJR$Iw<|3LI!JPA59B%VrNv9QavRA1hSA$yzdf;sA$?M)SXBfJlr-}6U zfPZGK>duiBCew^8SSkf8k9Y;@FV_ZPdW-q(^4S_h7YpahE^m*~3D=jdhl9Rb-)A-O zrZBek7LbA+`pE6*dTW&HrxEGLLH$2j^q3fHY&q)e7mK%uF=sScd?F z;r?uBBLL}iCmuSkZDna-Q>~Q5b!)Np>Qv-vB)chEjiMwL4W622aUP8 z86ky_-|=rR!FbQClMw9mdnVwEjosO*AJ-;4+`6_tVnZfuhZx!b;aVNNfl}>7>^q1T zy<|sj5^oZxKUWSv)b76my<-nQOkTlS=m`zeAEHtEYW~Wpk8ptzz?*Vr1K^YQY)3;U z>2mUE*J+ZI?K2=YL*g5dz%i0if>T%}+j60DYICAI4@poqx>(ZF_gfn5w?;4wmK)xmUUl@q@E~HFf zNLm95P}~X{G$59n%Mi8qFrKgoOk_Mf-6;BapV3zP~ z9Or#}BQq8`jv6nxN0X&JjQGL9@kKAnf~o-@;s62j7Nu^n+Of@(wsDL(~;VKlPsH(^vy6QBpB+hLL3e8$?XYxae*4O zY(zwNnSc&>j+*PFAtj?r?*OD>@a}F<>l$@z^9AbxcqbKLGgl?Q_UaUq>@ZRN$C#X0 z&8odQ?ZXbBQLuLHs1tRJgpVrv``V5yxH}qFI5}&$iKL}Co+t&A2F$1^Vppgo00^aI z(F~zcre2^vdcv{CXL%Zt#B`Gsu|f4a(06mSJEzI0pH4y8RtjMt2{#Bq`(xo%T{+@? z#p)gUR&448XfBx3a1+ZH*cNELqF+r@4@E%6rKjrr{o!3@at{PEA4W2#&xmp{)$S;4 zjoRqO7Ve`>F;7s-o%Vv|WXEqSAwZtQnc*flb7->cf!tg~UO4|iKu)AJ(`4=;SjurG zWkwiCWrD3b)G#KYN}G{DUO0_ZMdK`Gt}wcyA`{l4!-b}odn2`}9J5+PyG+|xCHOmE z9Wb)Bo505e2B?YVX}N+xxr*u6AYbB29T2gZ2gc8-b*_qldL8{$U6w zDoPJR*{!EgpBvk&{gZ^;hE6BL>RT_(7mbkcz_33*|4K2jH$rF@5-ufBcc|1i%#(;g z3Zkuficf0IQ4#>Ko8PB%FYYgvl;A+6PdrmP8N_uuhE9!KmLYuhazGx`*I*XEzMch? z@20!ST!yjh0e+gVk%*|mXG`vHLvDpI*qFBoYt7gUn151+|L?kzF>5d2-0s9g*}AZU zRd9%%5Fyc4t-YyF$d)(bA8=>Rpm;SA0pXJ1i|NBEWKl36Exc2>qbw>f$qrwttR=K( zoaq#P#6P>c+(36=piB$3M_dg0ZuB0gPlN~XMGP5iiY{vnom-Pc(xqXlur=uOS`b$!N zeR`W!)i`HI79&G1Vc81pucxdK(b?G&WcC^T=@ki+DnyJShj#NWZTXCZfCqbG_O>#@ zv#c8j#8qt+Yp!TQg5$$!6%BA`krfcs)VI%lr!@%Q4i*8%9%m7*`)j#h=ll#R6-JgP_i*X_|)N)0c+R9(tXyML>$zv=TU}ztXEQTal zOeHT_ziC|SxC=@>0-?9EjA+ z^7tUg5@Xyz?2at2g4ov=)6Wdwzh-atJgKNs;Xd>Vr9ZHdJqKmcVwFX^4SXZ3hY=}X zW+Io|?jeSecz!YyW72``fMnLb+mDm^N{5z1lfwTPa4x3YBiypqlz4N%I`Ig*D{(E> zXxXJ<>FGFE8-za-C0`M#i(0G z10{+(OBhTt%iQP55-si=aG139C17=Oh4)sTU8xIzf~Xn$v;NL_kygsdjZ9r;r^EZ2 zP^FwU)h%};&iADLhe222IxX_?hwI0{m)=YG{g$M9CYx}nJ;^UCKgz57tAV@xfUNVv z{c{Q6#Yd>QYd2T9@{qooxKL^!HToqne>BGR4W^tB=bZ!5YKXGv{A1o#& zxlA@*pdx20NwNkpI~#VFk$e4i7Z^;@$cCN*GP_TM6UW)8m~1h>SzT}|thaC7?$5fR z+$5X$=3msNMcbW(W=}X}SxAYh9FHg6>GW4Tvn^Jyhy(%)jk9~W^$&-z!& z8V7#LxC@W+Y|+1EmKHpI*kuHWjf&Tr4B{Icvf-SoY^N}g3AjBJI#bEIJmX)H-Rfo( z?2VS3)CLR6>xEK>*Q3uHgdG+#|Gw%vFE+|!-ps)GhGP@6H};vGYI+t7iv>m^4%^YF zV@4q9@|v`%cw@K|FX`7{-ZfQPs#@_Es_#sX9%0R$_rb8mSgOGx8u?y zsG5A;alditKK zkx}Y(i#cK=8c4an9 z2S*JfuS*m3979PcUh^1z2#vC!tr%t*# zp~bbA>4;yBYtY0&*n|Ra^lf`!CVpi1J9q~kw$isNg(frtdvVZcw2*(HJ3$!@6)b50 z0}EUzC`Sy_&@$b(fj&N}y$+YTkM!c;i?{e(rfV`qdl^H;hqPQjq5Y$!em0ycPLUi! z%rS9;3vrYyvzX-}If1#TH~sFd$Vh6VMxc(f7J<(PM9P@3t@@viYl<_gv%l}}==kJ~ zjP;Q7Z|FlUjA&NzL>UW{^46Kh2YI7Vm(DhK&EerrbFNq;7pz2)OXSbm16zNZII~B~ zY!-(kK(FyT%ZFJ9nlUIHdrEtCPx{6MhFxCN3%)1_pTc|NkkbwVO9QojVwf#dnpaM5 zVaS71lOm+q0((!gD1hV1oF9*6yGGcR(tdeGDC;k57Jnr?lJL~!(n|60Sa6E0*_qfv z8=O0{_T~21N7anDamN4tU7pE0PYTOUZRW@HT<~~=>O)wisHH`}O_CIVl7>~s>%~+x zXGD;*=v_rxw9JOTs^a>s2Lri~wyfTI*`{NS**z(D!@!Z=K~*X__H37QTl!uaV)y|H zRO1K_$(eSv(a5zGa;{Qs{9Sp<)-MhP3bKfJ;ksYGj3B%%$kn%SA|Z?9NXZZXgk7y7 zSs?Lio+WKBQ%PTl7Pf;$`V8EC!y!rHef!E$b&dL}%PWXf2r&=P`noM19cW_zhnGpN z`{yeTv_qxhlRExrWam%>xw0ds2%otupnyW)cb0fh<;O}O<aF>L6v559@tw5h*1pcfNgE2#JnYU=VX4 zpXKdRyMj0@JuU{cKdb`VX%BK(5}!9lfvGK77=C$zt|dEkDW@!ZuVUoNda_cz#N)l6 zvOIL3d_}{v<=Sx(mIkfJ?oOCm1S*x@rNj+MfeNh`?n3!6rXW&b-ZWRGny#m#%UPED zpvP3L0R9PV(+H^u|AG#Ofef^CCoW~*o>s8?dTI#{?lWH$4O|`ZMk)Kvet5 zBvW4Gui!O8bG5cr5k}om1UlYAzt@8V3Iqj9|UBSe16%B9u{H^u^`83hmM+Hito!h5#BzHipDO58M5B%?~~I-C$hz zsj}gCFl(ppv>2KPUV@L#R4Jf?G^Lke)I2tl-ysFEvpVEnp#Y@E@%)?ura77LBReZ5*bObMc)Z!^fv4F98@jM^0a)R0LEgt&amhx@U&Y#!Q zJbXot_QA+*`^uPUsnyUdn7|qJNgUP^7wxPuOr8LJ1ak>JrU)K4P6U%{Ihei>KMUGW z5B38EAt(hLqg33?RQ(kNsY`_(A)cwdFWH3VIquf~-LIG{I+t_WAkhX63yw}_Gu@ywXULmj;Sc}*neaK#m*?fck}~SW&!r&g zyR&l3Fh#PMZ|&yfDvdz*tWi-V>b%{?)$9`^<307h3$G*V3S#@g%${$7HvEE^JK~ybx%9o~ zs*V2QvZ_6A(fzk-^1{%NaW8RfkCk87_!8aN%lwXCZBZZ~EnIS=8r_d7v3YFj#v?dNAd_AaT_K)F)NEPa= zr0P>v5L(|WfvNSalQwp#$G7(#L>6Njbh&g>!i=#>8vn)tf(NrsJB)@`6OjsBVMW4F z-wPM-u>#BnVhvb8OyJZmdFSZn`(}@>*tLl{E4BEb`v_Yk{^fnJ!eCZ2^)It0s$!aoy!l14WSYo$7#p*C4|&93ShG++LP|Rf6inah=vG1b`BeUW#-@gWyD6WZDFpxBw3?&$eHp5!`|;s z6?hy+@OLK`D~Zhd2-WF(2Suw-y;~V$MOL>oj{c!-xApvkr!HIQmrhQ!{0S@G6uhIG z(P*^_4x#?K0dMS|C~0Kj0F7{ehe;O*qWy@Fzo6lFsPF$J8~%?R_&+XRX72xW`Eqgn zw`|D$f07OLv|aQ^P-dG37HBJa7?S|d$ImGI zZ7AAZLU8)XO7!YxETmF%IW$Ln9pF?F>}f_GA)lEytPjgxY~tCKe2U91S~_+Q`=|nh zRCH!^==Sy}XdGjx;$V(Wxt>4&{5hQ><6!IgfSEn{B?-k%z|EKgK)hy*YyLA=lAiWZ z{A;v)+`D|-xF8rOMztyN4w)(L#n*fik4>N8CM-AAbzH#`L*dL*P!AhC8!o~&)?JQx zBKY?xbRXZ_<>{YzL&-cIv(%c2)08JB-Sgmv7w?r1WOQx1YRPDYv(n5sde5Q7s9;!qWDOm~VLw<_GW8ldKH zI$FNmF>RxQ$?Y&q`}aF7Y$a*+X{WeDbZR1dILs7c3v+L@wu)}LtY4bE3#~?%gM0PF z^WjVMwVD1TbiL$g?&bs6t}|2ZW%J(Nt&hSeo4PP?L7rD4)u{`tI+6L9W)rr_Ms(B` zhGazDD1{*lMivCW!XReiT&|f(U|jk|3=+w@^vcV~tgr^F92TRBF7GH6NCyz(N1y?$ z592|I?n=1zW^s6|j)lLmBK*AI-!I!6MnrZVk)EDFSrb;3ecvrhTgc{OKSRtQuOU|- zxNOa#e3R=F(e{A-F$-bPRHxzg@yd-#d2^HGO`nGiX>9GMa_&=vwEWG#tId%L;jWY5 zn7>+4_?o?pzBC*DeamTAO1!r|9jH8kZ~!{Cfz?sC+R~PK%!Cw6zDUl>Q5C_wlM`On zIq9}#3%BQ2rgq#GDq`j0ly@L%#hOx=aLIFLK*mY+|ltgXY%~Sjm8|BPxWAQP{h+{#};3^4=5%udM$gkC-#N(B~959e@>>QJQb-v zML^{RRYDeoVO|f6xrjVOw>~1-mRl&$1WEy7iv?V71NTThDpeu{rxVPG8iInoD--nK z5>80DUfrzXq-1w_+QtFqTiAI=)Y(WI5$No5ZEgq8T@1O%@F^2%ub&jfegFc_Cfyr` z4xgWjvW}{KN2fzt)rM-cyf$eD{EVa;HZ^qT%NfPVc^V;YYtX(xu`LVFyRI*)kU4%--H>T!?~4|H0`Mr9`~8PeuwFa#s0ioXFHA7ql5>u2warjKYZcdn*ZLG`Ux zkG8|{WMb7%7~-#j5Vxua7N<*CUU?ZG-V~AU?JMC1j5j26cpTjagbv2bw)lPnD@fzU%b2{MGwY9dGVg+K-zlsDG|_V2^;)kw`XsU$ z)aH*UaJ=VIHYJxM>`E;_?ae>&0C7RCy$JGu%>&u%}>gyq!*_B$c|2goW9Y>DJ#e zYr~8J4Ub@3E-s-fL2^|@E{?KLs0Zi5?>?Q4v`B!I_#!1g|#l{R-rvf7`JRJE_66Rgjdy<4YquN)b03b;mI_tbqkC{y^wEt6I@g-f>c)?Av_KM zzdWw5!yKVD!C2mcTR-{U-yavx^o*LA%wtK#xti3;-kSu}K@bplmradEgi35B;iAU; zaVp9IHR0l*aVhowt}%9~P!3Q#v$&ru;U-&W}lOfcePjg@Pm=5pR=4G=;tdLHJPwO``*Y*)yPNZyzC=r{@CbD zN1FYn-5*C}iMo)K>$fpxoY$!J`ofkP%@4zxR3>#iW=_AQvAk7<*}feP!fyxnX=^*V z0Rt^jDv%o92Cumi3)x}O?E)kjRX5BDF39mckz}AZ8AX7ebz&~SNr|kEnm38%OWh-^ z@YJ~6PZ%@wkpLp5$`?wV8Ak(a0k)eq-v)^;lb>P5lnJaMZ=)nM(uxg>N-eJ(T)j`~FZX@{HCWxybP~%Um@DR7k2POz zz_GoS1*LKyPu7mVIv>h<18oQoRUIps7!i1@O;-{8GMn7u+v!@uZj6Wl51+zq zdPF@+2`Gq)7B(T$)eFY!U^>9jv*0H5k7>D8S9? zc@f(}2bzYdZXgS|L2yJTYHh4@V}(_bC;(EHU8$Xalot)_5HttRM&ld#5H7HJv7GK$ zY9M4@(C1u9$vKzzpWG!z>?FWEE}pwMa2Fw8czd!;Fkmu(rT~gVP=Qbcbzxo?Mb|bD zsVv!)q(6pCfS{9#KiolbO;BZExGN)zUbd3m4)BR=5IPM!qiwGuF<*ukwEkN^jCK@S^F(@QhOfJ?6Kiz3J$$dK+QnGAc7i}Xrn<#U4LJz zDNP{zOGW>i2}?o<*~)j{KPjrJ`aTerc(=c-y@_iOxK10TeIRX!lYhaiXZn|# z^e(-)Z!(I-FEDvqUIB9v3S6eA@CA}37Nqea^e1(+6GQLorZ#b}OP**nkr?BuLuG^^ zd*qh?Q)$Nplk9Sod9`B--OZ5z0!zQnS*0X<4q6|2_9hZl$8w{qxvxcI7o4ionzw2K#uGR@ z!!Si+2?nq>MAw{$JGoF%s0flLGen5&toD)YchTp3!90HtQOYK3`lzCt3X7=EwbN$~ z1v74-SkZ3O(d_kJS!+Agzkg0do>rc_CL3D{kCm57F3}Ec8tZ7O?+ar?H~1})fn*Ia zxiNcAWDvBHiy?6OMIuB$9yP+%n(2EB!oBABuIWSAx&erqJqG z3D5LW`2~-U1_08|gr1Aga>FfeJ{UO1j_JSsQsBio3|mA^s$h8DhG?T|R)L%mF)|X> z0;G3A8eCn6ii@t?k4}({=yRXYP z<<%z$77W)bVNS-+IuTWV^6);ay5SCJ{)ZGU)m~F9t$4UGwVg~3$X+CzZwsZY3Gaf5%#i$F81r?WPl_ZupDOQx={8hGH@ z`{sJ{4%lnmClR=7Pd=uaLq5_l77L$_(i&m*w9{amW<}}QHN)6$175pawWvh0^F+4& zekN+(dx~-?V$wD5sn;My!IM^8euSYK+zI0%*K;E58hr6S0i-4nVU~Q5C1m+YH)Bi2 z>5Z~kisd?!?i3;$Lrs_|Tn^!y*0O;LyG{nFn{X6h0JAWOk;mUFH+2)PukDd9ISap? zB&LxNA*Q8zpt0kJb^5L%fvpYkgXy2=Ycq3O3RH30@Q&2fcg3fLRrAs7xCS5fT9z>* zX;SFFE};suDaR)9I+3>mydDEu@&^AqYR>f^ap^xI6gww7_y6~jr;%PUE9{y zbxb ziB;XLil6}pj}29>Ox$kVWhj8N>`fNNIf)}Q=>3Df2!z8QD}4ZuXwb8l@?xc6;jpRQ zRw)Gj@X7eiPUXN21Y7Zti`Joq$YBNVs-~XX=gZv zGps|%t~WTkA)3W|T7SSr@ZzBhswS9nt5+m!i#zSL91a!C^}bJ8$?!4p zsZ)LoE`pIvVscuN5Ao9oRsv_*e*BeUwdCgVFS-&F__ok#B8@cWw0bB(=(!<(v*scS zwqJTR{vT4GI`M<&@P?+h80#Vysd#P%fcipxP4wVfW2mwm#53BC|7s)n<2K#qNU5vttYQt`)8f7#Vp-DB`)UpVe71p>((2f)d{OUX?IIh( zTCRuDPf+@UUK;5AQty;V=sfV2#rn-6p1{+G?kLKLngKrNyL;lUY)JbbrVAmAApZiQ zFmeT2i%G4yl2j=P5Zxg#y$mDO!LS-f{7PzR_uhlO9Us0o>Q~mRrnFXdiMrm|rOIu+ z!@dy3v=ldp+U>?u=`Q?RlZ+MwTf@92SXy}#4wcz5k{nRIlWydgst6$p^iYc8@cMqL z7&I`I>@vaGE^?Iv(@GXFo_ERP^d(S5%y4uE(X2G&EK26gP6+82!w6?yPKS9j%~qoH z66Ba=J>sww91mdJv6c80(v@|n>vRiUJOfxYQr)Z$je?5mCt8u?aOUeeB6*Q0zV<`ka;}C6gNYq%hGBl; zhkeIBE*)>u!B8^ zu(D1{GCd^?5IA;|?7G98g&f7#g66WM4Mnc3VrP8e{lA!=@1HG$#{BpdHy=Pfn-kCx)C&H2o9h;he1@Dqp*v!#8(Wwa!F*snTG z8B%iCCZTaIn{%}MXYlPdib%+9in|Up?V7mpt?hKPRHPqrZ?$-rf}u5=e(_}~>&%7z z<-d07u*#6%caxMl(E>wFnG$p42MS7 z1jnCx3UPM}QHVTqTE&=CHT)v2PpiN>km-RZ_S_srmc-y-FB#6G{NxDw0yD>(uruwW z1LAw9tXxQ!5-bTU0eanz8FsbpZ-y=fr+p=cPeO0b_*$?{W`GcWGR@@0>OvxiU`u{^kRQNrsA z?9bu6=P%cFX-QUx2?=bo@>oN0W1E}?{|UMGP@kGVYy4(llm$FjIr^7$#5JxDQg^P~ z;p@o3N;t{-(8lc9mKPQr@D$+V$Pr5WgmA5;8A;m*+ z*&=^8h77V-O45t%W(_#{c>arw;9h~F(he%Vq+S+%OOP96dwUB#n~NV@g^0M^jY8xw zn&FCuS{d)n(jd2RAL5Q~Wgn1j$>oWDaj!lk%7cHtzmb}!Afga-F21G&pq}xEwKydI z!8D2fUjK~PO}jbhu84`Z$H;wvOpF^GlGVLgKDt31_GpU86cN{*DL8DA#rf*r*!F6j z>#5uq;yCp!gp+1kmv@j3z6;mnr9e zgTLx3b+34v~XkY2C2^LC?4*ZX{IW1;?k59Q}?UocnH(%2LFyBtO-OGycZ6beXp_z9bu)9({3j)$UcK{tL^fMB z1%JcQz(bSd-1YnA7+a<(!TsO7E&Ok1nEMhT>IUxet^ND+7~Uj6IaYEBZl9+EQaYD~ zA(4(0%ZDK(68ci({~1@+m}$G@;A zjHbOEEbC(KK)e-(ym9_M~1YoShLn;jOn%`(LhYQM3!pi4)A#u{CNF} zts`qmwsl8waXKPZ#IzW#0%42j(CO$aW0$(DpuX+v>u(&Uhd%yIA1elj0>**DLQ8ja zM~g5LWY8GijWEWgckr-Ot~5GU#8_PP_4`QZH+oHx+P&l3ucj#>e?Q-0jMYInDre7L14fEpIIBeXx6w+6xoF9SC4PXbiZVc8L@iK=VM#)?qb7x@+fiBeIZuyZf%QC z2DFSYOHcy8-=SQczhUdtdG)(`dsjRjWOIn{0)w$m0!2kKn!D7Xqf_1In(}tVxtCztDt(TOW?_J&ffH#6jbfxDu9B_i1@@4tHT0yH zBtU0JSN?=+H~(s&#H#$MqU_zcM4wKNkZW$vkvYf@-Dv0#^_$`ek(JEA< z-QA`D*n`N&H@H^MogfLml=b5T!Gtbm-Ld~F4SvUg{wsefAeaN!Wkw1GCb zj|OV04QF**GJ7IPU_o~RzjpR`ZB{AUaRGjS7^_mgbc4ZvC^<$a|??>H+C3hbh%UYn(rzxn0}geolmuqS$YCOe7M9YcTc zkz^$UEJjH^P6? zq=jV0s38&$5y5AgT=xbkBv=!mEGeZN!Geryxy%dgD<3F(?D7^v*abySbdH0k@9~Y? z9{Pgvy`1R$1HHgz`$O@>NU|5Q=w!NNH)f`Nl-7CGUYQdUuV*USh9ZEe%hFmKz-3ru z5MrZICK9<~Ln*ivmK1na7e4ZWT^@{o6A>R`>lz)@4Ir4!IpFhF1_*?$Zy+{@j?cKo zIiC6%rYf(~MCJj5UY;6(1a25~c*f9?Ibx(l%lz$C+GzH4>8Qlns>gf0KMAg-nxD%8 zDLlP9P{y!2Y%pHhhnYjkUK2qx$Z+h_^vF)gC2Y`ZYvg2CiNhM(Y0OH$^N1chaC*`E zW_2#mz2WtL&N0px(vD)0^d{Z%Z`O6~G6| zjEUZCaEH;Xz8^Zi^QXhs|A86iO4SL{J;>h7-^zz z3mheIs=v6W+W19o<2WtTm~#@xSJ^=sd}pdS8El8D+ZOm$^m}Md)r9p}rtZ{5oQEh| zJX9r__n#2PmIs0DBJ0-Jky!4>c(2hy34*qZM+CJ{fkZAg{g<#3%rX8g186^z^|X>% zCYkK9$LJQ(#11lX3%NZGh|Yyrfd)0>(*~PbY1rK-g}o+N9v)z>u4y zs2=2`apJsX@qJv@jJP_rdXmwq&AMOg!8dHS+}g9JVEhL8fbt=^Ndmh7=$-h0!1DUX zKoTn>X2P2y+x`bx^h0!b`BbZQ%LmLEeF2d>9WW>C$1JGnM|SM40sZNG#nIVp=KWRO z3ldh5*K%9nk)6&}M|9+b`0otpD-XcZJr2j{(8LGVc0jTTdYP)Wmeb6}HMx!PNk405 z)mu!ng2{+rQg9f#a8z_GQ36$4mJ}6LeOFNP_Y=RAQR-xC9x;+NW*=&^)}J1DI7*QI z)v`f8ElHRrlG~W*$R?M1sBA|L<_M?cdTDWTTzA*{Wf6jDT=agP3*jD$L|#OP<8P?h-BUMD%A#AZ!_d9bg!;kuTksn`w zWfhA#z{}yZPU;tyW22p;yNV}pac+1v2S>f)vo@&9V^_bg8)0M0l7A2<3NwCWrq}{H zG+lRkGngqPYBbe7{KS?!SMjNmYP@(e7=Rdsdx@}~hDBdiMyO=ytG!?(TSd82)G|mZPOzZBtwyLn2#`Y4;MP8d>_>)k;~I|g(+l%Qx2KiMk1MX0Fw_kOZO%9@p?SB zeCC$_F5F70^YBDc5H|^`5ef?Mu()u$v9%n3%{gV?ZYLK28sx#dKEFcmzM9v3Ckqit z2!2rG|AWhsd8lUN=T1N;N58hnjeAy(^@9(TqK@53wn&tw@$p0^@cQSeOT83MRP)xz zJNJ7Jqrh`H%Iz9-7h3`@rwb95s`b0;!VE&GGQa7L^Uy_-#*z8h$}f>rLyxoW+&FfUJ1@b=Y=}JvLJ)^HFF!MG-Dl=d3(`Xl4cI7 zw=eeH(fn=YfYBB5nwiqQ>_P?o?h-CLyS}G+Zq8fup#tdILwfNEir@W_FAs5{T?M%O z#95FD_474oitmzC|90MlfnHg6nKOAd=Oz2o=rB&eV2NV58d8YO+4UAcyi;O~D%CRW zNy^B|?0pw1h-{S{YtR^1p+mrJXJc=Z__`B*-1DDPOae6;Q%b-BnZ%g$E{%Rm8vjeU zcD8&+i%T{w6jd|ol-Ce4*4_KnQfeitI}y6o;GMnbZ8*NHD4;x}$T=Z_I+fGJA-|vp zmXgeWhDHzJ+gdv(b46e~7wOr6pCcRb#Ya3^VfF}3B;fG+VlRIy*CsU?tLzRA77)I^ z>vtz_(AP6>26iN16nUb_{gC6X&<3TqVI?8R1WVZS(h3kqydfM+*FT_{>*ADBruS63 zfU>V!a_XY!Pvc>M^{Di;Md~&dQ+TmAXCCmGb7E)&ax%@Ni2a&uP{mruU!8e_4NRL) zW$77bhZb>wU6I0r?8T*oJtpiz_UlOnuI?=4hjEtXaq5g> z-tz>>ymM_FLKdv6!#j^bv59U`aa?D#COWVRVK$0s0WO_$gA2J#FDr% zn_}wLcJvR*FRQ5>Mtf4lsAt~3)KT?q-anIX=43q$t+0l1to1xe4oriz!5SR1n2&8K ziKT3@8?KCoBa|Q{2WI2hwk3av8FnQ*dCGA807mN|{N&e)+PtM7F)oGpMxS?{-N~cG zp%2V9gYgIdB(}RpN13)v9#1WjF9`~9@2pPY{dsIDcdcLoWnY0mg$=L^z6q>Lv`Yxu zdM-&yFH1{0Z#nI(MsF`Oc${dt%26xI>sFE1>8Pp=!PHGyz;bF-9f+3OGoG}LO}6k7 z4i`p?+ePz4)jQm=k!ZWeTan-y9C8bzIGWsa#pah#97{Jj*KeR!Zl6QlD|C8PZx|SZ zI%$;?lu1)jfOqVL+%`3-VonV~-z?FI8(Vncakvz; z*EHXR<2oGbzOb#wWFm}*Z(YY!E3CHQb6Ph~Mo5ELh59%PH>Fo)rCzwPrB!86|78~7 z4(eAn1ZS#Z?%h@>Ph;|l^N%dzjhc*ZGFC^H$W0^hRSgeWmwk~x@dFv&Y};&Nm~n2p z2=dT&UP4QTZ&8srJjpY576X}FRORMdt6cE5IZk&Rr<8^&_w-rGMK%(9;#9fi5d!iK zvd~4e-5eiaL^E?uFu>hwL$}xOC8|c$kHBu2e9Tp@PViSVDrfxa61~^EELw-qR!)eH zP{Zmy#M>Ih}7dODt=??rCEIAGC5 z$wNm6;fsSqsCbFF07HhzGy0^d+Q}5Td~c+ z618<8zh}Vur6@CXz1fW$shr5(yH+a>qwpUL4CEvxteWPtE(q+AuUd1K&yjiJ&VcnOoRv^NQk%G0zlI&Wb!>_^ z+>{$354XfOS&(cOLiwHFd+xe3ph%=*vin7_9ucZTv1JA3f_H|;_B3X$VpPQbx{(@F zg{_wTWv^9Z+sx1BnUgzvQj;V@(SYe*%;MuED7SuLq`f>@f_($9jeg098Ej@O z7q9>)d9@q3in^U%(TbNJ&oz}A(Yqs-(5A9P8_C+ciLrNMnRy4LN4K>3$ii9ZXDd=x z&g{@YaThE;S&jg!SirmWd7@~F%91W=I5^a__zRA?;|Bb1rR{$a6E_#v|3^$b|BaY< z{!fT$Sy$EtzYRt3Pirbs0~eC&2FebS=8+MVQ%=f5{j_clxVSUTCb_t5jA`lp;bQEV z960zTV|q$%3KA|7ORU7#1X#Nx*lyAF6&C#}jaBezO+Gd_IbaUI{ zxZx$#%^S2GtD%Qz@rs+*-Y~TN`7Vg;w1LN=chH17-M&9FApyqWS$QWN5B%5{daaNc znsJPh?MU5#>6|e8jM3)!OsyAz{u>y-!wMy4Fla4EdvW~@ii6qG?7aapn_rhZK z#WI&)U_GWVw{JyYu#=+@>%k!+5?zkkKCj3*kOfVQ;j$tniMYESXk{eG4`UYo;Mg*l z5rfFY?qctWx8rk-Ks1^hYwW({c&W-y=K_=g0aAM4SW(+OSVXa88j@L|Pd{K)al_{f zc!o*x_JDlvlJU$kEGDCeJ^8v4#?g#W3q&e#Ur11ub0J~n-rg3{h`;EXF40uy*s)9= zL36t%&hIxRFWwmcbR9wcvv^ee2^&V9c_;5o2w}PHdfgQ$chb(jI8e$=jL!@aH((6_ z0O0uocOH-T0RUgHI6o&xXVIZ9GzLkV#;XKsYYdFxf4vUl6+v+2bf}i(;LLuK8hw;D z_T@~VM5x$~w@Ij8q6r>2>4C4TCMe~JIS{9raoArOv<1kTHQOLt3Xe}{`&pNBI~++(3S;BL| z^cg6SA1(s}G2D)jK>_?x`F2S_pH7i_-S(a6o`fzjSntO=!LwYZt?d5%jxc$#>j;_eEgoP-C#8e#`+YkwkJ zs7Jrt4x%hPg4h)%XC-XAg_6U#-jJlTw+rhH6f0LoK7R$u2!$1Zkso^9<=xBpvYVwc z=C^>W!_@&qOZEzR&@?Pu(l1So6xYH%;i{wX{E6^0z?f3kg>fGRX)3JU-~o*3s$01f z{lLdbHK@yxgZtoFhS)---k>?D>gGy>6H3CvEm}VjA|th*uRG$(aLpn7NlPFFcF2A8 z3#+uG1UXCIq_UHs33LsLZ3y3TO5S+Au8!cO_7IkTXfx#i zDi|=0Mj0)b|JEb!%2A%1SwaEPR1_ufJcHVCYUBruPc+bOzbblu1sdv8O2oye7IdGD zUl16O`HpoZ;J{b~tK8$hEs%xu6Fw^6&THkZy`wP(S0E-a>q>+!YSWxR*0Iu;KL>`IDQl!yT*UOlsWhvK8*d2l)C#qFfUHIkO`V=LaW@@#4 zK)WKc_jEgwLll59OW}C6d&^gB+(Ph8m>A2)jSP8W?|6Pas}4wHn}_QxwL?9 z1C3QeliB=AR`{G?z1 zS?h~SJI}o>2CKC|ju2vpHT31?#HJ3GLZ{!^j@KAF$S*Cz3s*j;P7R$>;KqcdKGrFH zm>0MZYL7rb&K-CY9=+sX{ zV>UsYj&Z7eIi668WHq`rLO;-ssY$hXe_UKx7Yra#Ls2u$|5!eMU#-Bewrp65(^fsGhvpO73Zkh`BaaebfP$Z!JzH0 zzt)gjj&vdP)5F1JS{6Y;nwfu>s=!l|{{Qjz6<}2@-QR?O96(w^rKJQ8UD6=kA>G|w zQc6oB-Q5V%pdwNN(g@NbE!`>Lx6eVm-0QvX_x}Ii_uPl)u=nhlJ+o%bnzeqj*6h8T zH0u{znyfkT*-Es+u*0_-Ftid9C8A*+qiHjfoR-xea#|C>^#(PGbbtdQ8Pvs>?;{o! z`g~p~C=VS$rqH;XA3Bdi*+gqaJuUHx@$7xQTnhJGZ(wPH&kN3tpw1`*2fa2V`rVvj z!#Hwm1FIXIIAlh_ss%954?V;=Xm29XVJ|$T_oQZG=2+Bm4#v!N4K~brQ0Te&P>3|% zlU=(HA$+;dS5>Tgv7!C5h_|COIUHP%IBbgsp7r~R1~wv<1aHSXx`yno@~aqTYRRq& z&zs1f>5@y4les?KPdLFRCPQpEL=U}K79~7Gz)2D(B15}H<>tTe49<#Ns!QpS41(FR z)b|&@j8Q%3rH!m(l7v()!!G86TuVXmYqnEHRTB8U zzpdAKvTsn}l+tTS!uU}D2EM6_e0kH7Wl+$pccUtYtr5;uO#SfjT>O2CVCGnA%iGT$ z*i%@oSBm<*;#vsH7@V-T(6V?hXjV>+yLe}Cl`QL9+EMLT_2SS3axF|%xm8>-A66`g zf(yjw$K1q87z^^~n_6^x;A*ZEfthGIhdmr?e&O@9A297Ij1%<8g*YMEyCkfpAUqm_ zLLRsx&dr3*j?G6#LBm^*dbWr^`KrGr4C(0#I-+J3Pf_2k7zt3BDObt!VyKJaW@ONDkiUAChzr}Kz8JT)_h!+x4~ z>emXU`2ku7Wx=~k<^l7TXt$l`QmDhrVz=^Xtv@|}XcxTXqmsx!S=!D=W6Crlb1w0b zv`0L~!+rnNJdvRM+~97+J&kqm&8-2Gy`h9XD@C*VwyrtTu1qdjZTo(`f!9aYEJ>2I zdg0VYuU=&_?8|$Mx(wcGqV29m#+nh2y34@;x(I!x#l0Po#w;jTw#aqwMl8dyB2Gs# z+>Em;XTSn|k*`Y#ileMV$iod2-I2HN=}F`?r>Z)lq|ba$mBVj7LD*Sbh&RvR>^&k_ zO8Jm6sW9(mc3R2d6v!};gAyCJBPe0_VeP%?jFH^5MGTQ5(yQ^U<=Xa2aygs(WqhHL zJ?p9Q-9>y|UX$@N4TC~hgWrboZYbJuxPQjrY{yU6OiyubcG1S>mu|}4dovTwX8a&B znOocve@c5Xj1PT=yMqq=ZKm(MH8g3#!{HX;zIIOujbXXV;SGBd&7KOO5$mK(4}r+{ z;z`{u%l;n;V0r0xbo%cvWpuuC3!K^9YXR4Nv^UJdrbtoFMVsqV*>36oh%%uhYote;x+ z@z7OH-}X)-HF!DUd-5HFzw||qwP$EtK&_lvm;^(qa1J`~Wd0qTPb3KGJ;YzOgt&UJ z1(d$Qg!!#HFE1H%^dXBqnYwo*tQlN0-D+=P=^_pG{~jeK4Hg;7CYg@m==G8s#i1hR zd`E&kpSJSpxhSu5Dow15Riq;h$#f8R2WPIu>{sMe{O}DrCkDmdw7LMLACzo!t}j01 zp<2D69L9gQ6xy{xC5k%|&A5BlBx$lKRZWk`^Em^RX#%_k=Ofjl7u(elU?If7z;rxT zZqg<77cyLIb>Tej<&IH!-Q=)ws6yLZODoQ=3Hcd|oC|p;iJzsg(>(j`u!&N;tQvvN zi6G*TI=MNvt&M%#oXw|K@Psq&aa6E|NLSMS z1MD)x-9`&LAylFkEck>!85NEto25k#CqOckRZ!fp*Gp~Y)1qC`bVAR!N?i6s5mThIh`U^Tm|1bZ#Yc$jPUPMOl^7jc z+rNnkJZG_r_s}6H)}`XFD@PidP}tiQvlo|r+?Y~KQn2k>couc+HuFuY@G)%we+FOm zyq{2^w~oEM)ZN5GhrvKm%=&gMuA~Re=~Z8cdk5%S!2P7h+YZHvqgX{Or9$f#`W{-BQ z?iivP%<_-+=v>Rk+F0*oD*V1gN)_Lw#m(G>13quMpyzQ8<6_-9!L&WPJo>^UKQG#( z$(R%e=|ffW;}YFd=em6_M~oPro2TVxKe`UakupwN_fo1j+uht;DR24kIZj9qw)vqv zmOZueGO201^RTI&`}^Jb^b9XYO6-@yCo?f?t;EE>>HA3cDVJj(@_4$$QvusOsA38U z`Q2^N(aZZUyw0}IgrqfsNJe%Lcu{h~6;F@9oe9Z8wtHynK(>3x5Q5{{qYC!v0u)<_ zns}QAz8D~HejCEL@66bNtjy>(v~$ey#5xyGp-)#5*xq6J#6|Flt0yWMmZAT{cLpLl z4v6VPA~skpcYP9;DSW9g>cX-+`#*AX@b4Y%pJfDPq@J(Lju(_L$D)$7*C>36;&xGf z);z&Eq7;B;qPS0`_~=E7DxZa%gVfqiGqG`WR!v9bx0(?KA{gheW6KN)Ju)5xT!9hRTb3;nlQhageZJK8;Z{z)6xHe$M;i|3n?C|(Fr$NZHuMi z+r-#?!2tJQKj(Y4%v2x8<-JaK?^bEz8nE9t_JapL(~*}^s??3;A2SkUg&e;_Z{tI$ zB7}|c)_Mo$_n<;^bmY?bPF9rlKQcmiYoo!l(VHVhOJRwx~VgRtN!+U2H)$%rq=>djEI<| zURs>2aC?q%S_O$#+2rV{i(c)BQ8zFZeZRujpB-D+s4lJ|rlP!Ky?B=;7=}-5s}5sc zuHM{&bB9-%mV;-y_%{60o}aw&Z8BL zH>(<)1)ZKn3Wc|x3<uASR z>Iu5s)|x*&W0Bn?Doe)6t*f|4ViPDUVH64l)$kBmKDPO-jC zO^M(*#wedQ$~s_N`h+fW?@5{rX|S4a$L^T@6T9vkx@8&67cZXoTNz>3aXTu7Nu&&X zip30{c9PajDyyTiCgoALf*m}b_oI7Mxa#4Si_qe)I?Z8X9y(^EY+St7#~9q~$*UDX zel#_CgD%X{a%`^@?U4o>f*MhmPWg78%h!O;MGJC{fIykt7>v9HSbW=^A=(F0oXmW~ z)KBjytK;vY*&%T}qe?2hhr(xgFkN+~L5eY2LE;CGj<{eodmjE0#+RouqDrfULWRU< zaZbD#XHdnzXoEXiY? zk$%>R8xKWoP<&>Vn}IrXN8Pz z7@Cs7@|Z2fPkkzdW_Rk^LAo~dqTvdt%NgNaqMA2HMZeRdL{IrDss+cy*kIE~8AVm` zek2b=jf>^NER&Bwo?z_?cz$EMJ~Z%z=+J*31HR>Kl+{B3qe9qgD@WPTYaA<}f@=>W z2E!&e`Y}IkMiKY>A%?7I*QYr?X-Gy)lnX*P{c{~=^F4o18%My-48xS8dVY>{M~!II z@>?+;RD!qWnDuBbHoyG|VM07fx@)rAjmeWTMgw&PY>Muqn=&-%EUZ&;zw{Hy8`YuO z=8xjxVTzU7<%XiD?zZs5;ybEI^^a@P&YsjDm2KU^>UCkRYDe;s%gwTD949kP>10E@ zSDYLe_a42<<3}K(a3p&{vNf!KOE3z`HwUZaBZ88;1>uzj7#hqd)b8d)Y;!`|_!Htt zuF!jJg?a*stw*{++hm}S zJP;1MtH#ngxgBdS-n>C13&V?N#5SigGpF%G0H5?TQcb07dTq36Rily3TIVrcQ~cwE zvACfQL~AGHN{T4#w|!yQoF{W*uVadMM52p}*VSnR1k~-Y7$16CdT)3sNXnC~jyd;p zJ}QXcI8q(wf2Q)Ls`WfiiCVS_4DZNe86m&rR1kP6mle3N$iv$P8)%{2Pf6@DeckxlDMnS_vvIVid|GZeQoK#%u8Wqa>}R^L&$7K; zCax(5@2%7CnBI;S&p!Sul5t3_U%cs0Nz`}kZr zykfES$Rc{c;whRaOhlY2=ZXQXjzy9rqTa|iQl8Vm_WU~SD6Uh7I{TqqM{}m_piG;c zTI-l~a~)rtgQ%1}qVz0Q8$>3HXYzeQI4u4k6n{mgLRfzgSu&zfxMC{m-g0wYGp}&k zOg((PcmNeV1N#&B`b_-sR;|7^H;$h62REwRDLZPP*89>jOUm|@_y!Wa_bCk=J2*cd zn2;b*`ff+x$94y9W-g2^MnInD<%VE&+zVh&@#CjfEN~Uqa&)Q%jY=yItGRa%MTKt5l*CSYQ>iN8(mh~> z+s7MIYftx)5*Fgl9HMaC&)auUMyHTVaB*N*s*ggU_>tKR)6sG|&(DH3%`YktsIjHp zl^<(O%P;ZdbICov;;8|TiQI|PeX*QX21ETALn|gKbY%WmZ1W=vFZmjdDBJG&8C9@& zL$w$_ZULod_r5X``mLq)2V&1_K0dcsCmyX&LLUqGv>u!Awz)W7|5F1T*U++>{XQwi zklgTm%aQv)&nKxZo#?xK zFpH>J0DF5wd}r~h^5~q9a(j0xh*Qs6qBNHkI(!@n)E|@D_q&FRkr8hW)pTDr9rh{ zm6-MUOf;9qaX_+dY?gJ8JebBa#j*$2FZ99lftYlv)uJ)k~dn_3~;u!!#euk!1`B=D64;Si|gkVz{PdDo94Iv zaO{=-s4bCV2>xR32G+XZM!VU!m**lM#6At?*HBligbgfI4EDOx`9@+@Vm~l9e>|QY zvR}LMy7A*%PU1Tu}0q z!{0utUZIRVS+!}Y&#U(j66DQ5*Kg1A#sxk`LUC8En!yBk z<+jWXh3tEn{U5DBz9Zw?Z5^t~WvGxce>hJ4d{bc8|6*F~fi0>J`3uEJw%(O?!dJWS(^g*BlM4XYYR#EQD? z5(#dxJHs6soU^T5NqFXpFI7XDR~VPEWz!u@jgBUyUU#+d@oaqPyuC7%73(zh!SAAe zu5F#i_2ah}sQfpEd7C`9auQ<|rhHYfqVLtgqG|7NgOC!8h2D(6-dme)4UK;>QWk2x z>T#}RJK5XSr(O>idBmF=i(a_0V~vG4`*B=B_&^TCsE8dT6{G-Ww2Jyxc29H~u7q7P zXaR%3#aOChfueI@n76Ai;BEQ2d?rW11p3mr2Kqf}A{$CHwXBhe;!Ku_*wAsBu6|Gi zjPiWkN$cmyWl=q1kJ1J5>*S-Yu?*!pPU$c>m1wQ{2Rk0%BTsym9I9g}-K63Z|oG@FBVNLL^UO&T9cST?llPUM|kV%6-?M3;PE}A^^?Hx zS^Kdon=N{n#F2YH<8qh(Io9Qpc|e|94Gae^slS0-HF2jl9Y&0%bG z96aido709D@F=_$isDd?#6#O@N{~M1LVuQez{aODt{^J$;ZcLRU@3V^A@{>4aWjf? z4%H??FisET_IS^oy~ni%3MbcNsh)H8)O-vl$3^Tk)Iw)JDOR){%-ormiYiFAQQ!(7 zA{Y~I4u58x^*ZAR-Qrj3g}WN9SV*UJ-0Vqi7x}gAI=iOH#8{+Sv{qvj>~8cb7jbA3 zO-MOD>D`hdshdwyTP#!@K6KqvxM_g0JKuVelo*L86{~P0 zDM*FbmDF;K*d`&!j60vQH=VN9*txv^rCe*@rQX36w!!YlK#Or%QwQf;2Jddc0xNMk zF1fIYDRgPP66deTGv9hh=6yW5lD|cFr6!;ErfJZ^XiSsUHSE|au9$dhd1r^Te4L$5 zvhQ4vg&)X*m)o=p=Wvo8XRvkgDJm8fUrZ=Qb19K@EHCw+#SLuL|@TQx;CPs|^55jKA*y0KP*CJdOH){h)D;nw0Hb4%ByYKiX15 z?4wogD`w{!YT_V-kUe_Qh``JFrH$r=^W8~_m`wh6!-giD<>9lnoy{PbXq=|V$er(x zAADNfFTZ%p$v(q>u(4>JGugf5Ww;BTJe{8#yQnusnAbE^cIvDCt}4@Dt|4Tc6aG@H zgznhI$12@*$*KUCdtUpS8PAW3A2qO*Ahhl2^Ji>6l1%|$g~F!SSZ)QUvW=xtF=e2h zY(Bx6G1hE;R5{)4RGUfT#9{hyKUYnBWdWyt>|8sd^EMF*)gs|!=y0RNvN+$9b)8qQ z6+W*HG?rlcp-ro}M0}HZorG5Tp>D+t_fA8CH5KY=d4^K5_ZJR1!jfvs9|rQ?=E0v* zKB1D*zCRq}WTkWPTd>|+r*KMjk$Q!A55XIBdx2Sl$WvsdX!q`x-`2L3DJtip*3m=a zIMLuNAw$?qfmU}{&x@)hQZnZbG9+6qa6e*GswvCoz(d!KnY2nxGK)drH-gld;beF%{;yK~R9K-KF3>J+Z4Y!D-p7Cc;pQ1NK6 z%)aB!*E7Ri@d!$xBCVXzyD`$;C1jLx*gJ_y52FbPR+$_-><|Au;Ox|25t$>`i_udnfrMO}G(^1qPIE}U5gLY#y z1RlP4667Lp@#132m{RCcc4!)3#6My#EInEiZn!yk7^h|~4|mWgyhgcFj}xd1*6)`MSByUc;U(M- z(XLro*>*Xt$pu&GS;hpeFa){CdqQN*ms9R_JK$6uBmU4Ki?BW z(kZow^ex}y3@vk4I(hy{g1SUo8fhHEMyj~feovc{pc=D|!}jxg|8f7=p~VUt!855l zryw8oo%SyL$yF&Kow`f|tJeq;FlD02m0rPTH@z%4o^^^KA2(sRGmnW{k&5THuJv^>l>B#oO6sv9Dn};N|w59;c&hX!gEth z`Z3{SU2AukfJX(hwjX&m>n_?VE4xwHdzR#Pn`yXqR^DX4O3WSKJD8fb8tTi}yjYu! zSt9;`9~=Ce6}dDH&?h$8n3%522TU=!eq!ZxhIM zjX-8d_o3AMK_Js#DP(2>3PDKymsrRQ{hIhs7BWL(Fn^(t`I5(f@R0dx1L40!A zfDSSXWB~t4;U$y*ppXT^)PIqOtk=bm^>4(G<&w#NP{{f(%OUIKz+O%iR%loaGAsU& zL)O2_AuEKb{~{0BuJe%XZ+OV|Yxew{%WVHL581BsknL}H$o5wrvi-|EWWUZs_P^mF z`(Js;{x9>8{klHn_(jbha>)Ky9&-H4`jF#N!CcN1j$hRLK_T=@a@TDN$JO`d{w00L zah->pf5SuQORKL_$oVhxkn=hZIsb--&^H?WiNb%Ghn&}W$n`foguZ&>I)z;SG7q_~ z^N{OrcnICH|0fFnWgc=}=OGj0-!KunrQK zYkEcWr4;}$xRbsSkVC@sli)wF$hCbpP|W@dUW34L@-ie$e0*2n8Dd_3LIOn&5Y!z0 zQ4JJ+U*$3}f|#HJ0aoM$K~3egf)K7;75uFlC`i7_1$xQ^Z3e9PtHwVnfb7A6o2M4T1D|9$mPq!16b>zPu1}N6Ls^ItP|L@fQ z*DXOY(?7NZ1yNVI|Cu9DXmeHcH2KZv~pGL?~Ov#-Bs@Y zYfJxe)Hp(6 zL9OyXwsdKxud@ID$>F7)y{h-O4qh7GtL*>FJi4a1g!CPaAr;aVFUq+8UYLm;#wLf;NukztT$122Kzc7Nlap23~oPpgQ?yPkxanF8^3nQVbwZ zMuLg?uLQDQ8i~IpknI|QOw7!`C}alU0S7zeM(BSKaD>_(h(Un-aX?ogS1BM3Od1a0 zjiJyy=vw}dyg!`*CD;Ju!pg)>!U`Y(HbzboRzO>`0*IUy%1IVzJ8XE)d zx~3TZw;W;wTbtWBJA#oZJD59KTI)NRftVS=R>qEw#`ezoRscu> zJDOXYTj@JoV<;0KEWhlCsJRtzGg#CLaNUX+19Qh1ENyIK>SRX3#12doM<)kkeQS_g zsH2oyQ!!)oAXYFWW*BIvdnun(K+Shc*=&ShNryPo}Dyf zB=2bEmB=B}`6sPE(JfRb{8elO#e^i}M4@8yo8n=CdRxHCwkkH}5Iz9%4h8IgFGsAL ze+UwA{!jut03&bTZ_{S-8O*#H=1p;W@&cWQ;_}U0VvGHH~E}<+UA}J5_U0L~e zrN(?IxPPnESXln(>UE_CQ9RdwkO}dh(m>u%bp-tkAA; z0uYxSXq$-K)Q+PQZRZy21e%QBLUc{<3;@s1xaDQxFMaWrvo7m<&jH zHekuj0<8HUL_^XL!k}s3`54GK7YoN_IiPcwtYQUx2&}*ztXDZeNeJ0&z!Ca`8Y3nq zX0|^I(xrX-M?ty6^IG2AI8>3`i^GTWeLK8-(*QzP*F(;kR`>x5vBhC zn3$OULAL;ai4DR_;KvCGs{Ki5ehv4Z2A&asJ&>*gBM9690a_5)#R3d8l(f~*fM1~-F5J1HS?6`r@!OjFg zFTi^Sxela3kg`BsfNuoSKWGh*9N-54diKj=K!OdrwdZGN0R})wh1|djxTUU+%z&g{ zU5Anh!Eb=!f}BIx04)b}6>@}JzhuYd@#-3c07xF>2h{y1S-T8!{v)Ef?%w%d%Ni5d z!~|?&4oDnuVh&aag6>kHQjbK0p@PWqS6uiXU?D5pbvgWF zX~6<`IGG`SKM4DQAQ9yEb3lF`p}AMrpi;^Xbr3>y!ev>AltAv`V7in}sMIiVU6zGP zCOg!9@aKA=sh_Q~0nRs8KrpV3kUYrwW!tQPVuPGP77@@AY@C3Ff|OwaC}9C~4CHuu z1#%TAe{G2W+l3NTHU6CTzhJ^2>ozE_W%aF%e_PuL0b2!todWd0@`M#|h_bV@X+z+q zsUrzHu+ZQHfdw56A=?Fj3?P81ZzpaHF& zW9!GjBm#kD%pDzpB_9-nK>$2Z4X|1S_8CxvZEcLfPG%0q#xzg@0kmt(0@3*Z8x;Ro zVq9MM*}2~lG0@ia#iQ^QLS|)WzJ`!tTVZ)FU}0{+!N9`8B0!e5kQ0jIJFv%h;g0XY zhq@d;IiW`gMTHMVz47vx1?xxy^GF5#NC5)|9v0Tx{;9C7ojX+ENvI8JNSL^|IOs^2 z8G)6XpfzC74fSnEWB`C~44`}F zA?SwOr{myT#|5FK$0en88L&q_6YxiUCz!bT&%^A93=AJT(uLl%K*|qo4sFlBc@~Fc z66`}qWM+X_9WfhP5B$xARtGntl9!qx(FG^GA_l5*DH=Y4KdRVm&x0M=IWpLTKZ-t@ z!<}eUAp4FfV)9s7AmhCmad~cbX^B&|rJXJCorxWpBg7+uZ_8U>Bu4LN5l<9+=ntK| zImy&UPQtCgZ<6bgSDcYm_{uWLIgF^LWh^2s^k&|uAH^dZeJ>AUYb&ScvO#Vkej%aW zH@#v*Q;BZ5WF8AAAKAvmWFmgok4Gv6jiL~NZg6CvA{RuVqK>AZqQStRsMa?AzMO!r z`u`j8|J`>0odEQ}dW4=C2w!jl8ia$1^=b~V{*r&-;&u6lCjXFsMj-QVCjb*OFok}f z0ct;|5wO0$HXQ`PVqjUY99R*o4>kZBUaqylMqp#G3D^{D3as@Yi$Qa+CD;nOiUb31 zs0Q1D?Z6ITN3avv73>Cf2S54qT!OA}uFs`wYnuPTTw-Kny*8QRRFR#OjgwzizBndi zgyUk9U^~^hA!vn(kz_AlZH#Fzar65Nm~mPoU5j$%`!(gd8=8tm%8lzq7@b1mH(`CP zo`cAO*PfEmherszu1R}bNH+Cw?FHGbuMl-?mZ>feAL=x2{kZVhas{X09>Ik6n()^% z&~Mb3K9+9O#!2aj;N2BiSCFNN;obTE+-rw16uxTiHBr*{8OOo*cPTvZH`06OUX9}# zoWBl{Y{>j3kVWlm|0SlVzf|L3>cuc_cS5sQRS(8#PPyH|lF@9%^gqNu{c4-D-E#y4A}o*(=kH8~dKb z#C?!7<^7nghF&2)b?@=rHIg?^d3T@sw=Pz<;eI%$^rj}=6WsI*3TB?m(LFZJ?N4P> z;@taKIuvBW@AOb%+is%O)FdO7#j&r9D>{+OUI<1Z7BBo|)_4%rV}}8$;8yw`>K7)- zBaiPrO=YZ?CSpcyJ!BNgnjAtdFrl{_6>KZ0BA|}eUJ7`8^ELcIZ$@SVO=pVUC*Qt& zXXO!k7y^ab?rixI!Lq|=Z=!33$=&&8tvO99U}up9+R70Y*-Og1rSs(4)XieF(Rk4C z=``*)E6bHFXE%O3)L(M(G*}M|jG)gMH*3fLcyB&a%PG~;wJvRS&Bm7ZE;n48z$-di zDTWx5X<74t4;orW@BENtJwN%S+r5#N_4{m5#{ME(Eo$gNIP)FK(`;9|HkwJnOT2x)Aw0Phi20j&m%OSCuQf<7*yZ)bh=`4 zcRv+Pg-;f5gRl%~O~i2MQ#QLC6XYlE_hH1ejU*&FH3$^4l2na3Jv+_3w_23tPl;c{ z3cP-o{_cSAflgJ2CP$c#OP&0)kchKS0-0{H!RZ2Wo{f7$-L_V8B??Nfl}yqSv}jl4w$~zY=tarw zvvsf@)eMf!QGXxCt5{;_6WM6Hqm@BKcN^QxX(7;u?MWW*3H{sk2rGHGW*ioS{8R;& z68RRxZ>po4)$cq)K-(z~c)!A>v?8*HF|8M z#OUgi^x|t(OY36^qvaj=k?U0jq#2Q&!m@mgrq+geDyv$euJTJsMk&KZPJTpjo4 ztL`MY@zBjJOGEditP#Ut^hrVy$B2S;Bh7XfyY-D@!eqCTIJp1U-C}2jycEZ&YN?B1vI81`5k33_cP=Y&>cl>d1 zWZT}bZh4F`Sz_d#(+Io_YW%^L`s4R>!!onurmayFlkd2PCXxL4jp&(tbD!|Xwf77z zz-1)*NU=|Fq7tJW@IZx0?HcuXj6A z*|m4g_p@C+hZE;9JLhwASTeD1{&*aoiZ&WJZ3qv8f znQ@x5+I^ZXf@rif&MBYj=cvZ1FXm*ueGu<+%;-MtlUCt-!{thh5m0%U)7vNijkg$y zcZkvZc+CFmcoufD1=A1JFEN(Ntnc}}Z<{mNM;Xi!l=Ygs`h~uys8B;sq<5&CD6Z~& zDHQr8#5DW^`h~)P#r|uDm{c6TyspU>aT$kn1(*?eDLS5}c`UL4x_t@_&ifS)t19!D zhxiFP=xMCZ*Q3a>-br$;3ge|6xNMDW9QjD{V;iaf4JMNW^JygPxwbTv4HLqRO&x`;7SAcS3vaqM3GYbH~}d`mLi;5OHSA)#vPi=?s3-)H+|UW>N#N_4icGe&Iq(v>%D*td!S2>qgvD* z=V^f+gW66)5&ssZi&QeXJ9*nNJ&79l6dlJ`K}*JXLd&`+zMzm-ykCt;Zy-Q3%e$s>9W2ImH94CwU^B-@>r5%0K0&ZOd>;a(gj zmVWqtj6Kznf#*wQtr6br%O#u7l5g(2cL8obCrJFVUAMB~Pbo(gxB26{@qPPjv~27h zg~Ofab)49hV;xiPzCb!cu|kZf~Yv%Fu7% zXUk|bvCrhv2CaIW<-8&Mj^wCuXu2G`ASL0Rwe&Ptf|8xGlXa}^jeR7TvNK_c8B4+9 zKyq^qEFz9bFeXw^QjDtebWp8eEAb8E+8{j}=6{J{8Eq8jFu*i;r2GMs2>IfLWW4?pkBMr)?3 zX@$+6o~F8l9atjQF+6UpF9~t?9hYL-L8caKe^m8wQzuBX&9tPP38$<2;^wIRKoPwj zBMpto7o*79f$VV@o@k;U_%LGiT@k!!$eQY$Oh++}z< zhfnr4$=8XAlbN8l~U7CVNY68826Tw(qLit$j};jM|9sN zZ}?Is(o+uSi_e~c^k4QJB~Al;Q{S#oOzVtppvuo;x0)ep)hFa~*eshaL@%+l3xffy z`9z5!QFg@Vq09w*(UTUPK^&BU^?t)2eMxR)`CkJYUC<9RVbfHtD>Ts97L51}6V~XS zgkcX%qGDR3`a~<*zS6AV1-UiC*0qx#DaJU9WB*X~2{M`r?qB-gc=3fR=DagqajkZY zF7f`8aM?w+kvoNIPY0zxGimf37kPZ+N#=gjyL4;r?Wd1cwnE?;;~ZyJqzzVkaA%-{ z6po64z3}4f>C;ckn5#P-G zn16$E@`v7Dm=sGFfqnmAmG|I!5?g5dDel_0?El_b!okUS%~!&vFlmYHj+3ry6Hd4mV6yVf#Y7E5bGUliD@syL{bO0k<41UJ zhkZ$9fyH!nSGD0M*6<{zl?A8d?+Zhpm2pZ?JjlP2z1iTyi8m(x%#qO+Nza_v#8Qx6er5D)-K}hR zg8u+;vcTL%S7i71{p3*lmJdHQ744+M6K-^4WA+pYQ8r_NZ)@$&*LR?8(xy?CW;FL3 z#KhY-KQQ)bqdz%+%h>Jw#>Zr{o-Vs=Id0T8>*+aYd=;LO-zFkX-u6e^LUP`=hG>p*F2jNXGuC}KU^Y=U>)$dmy*(9-`4ON@!*tBWbAW) zZ6{su9ARnY2Ss~r;$m`7Excat@FvF>khM-}A=UsoG38ukv6=nt8B+2#0iH5yl21E| z*~;=G4oS_eqVCUCWtL;OGP?t^^My>9y7N_b5jLA@0>E>jY2#WiU-Z%!C7~{CBr9u0 zDHge5fL?uPUXOyv@pRW>A@3!RZmCtG!#*F;SeG;B=PKNn4D%?`h@jT^37w zU;S*Ht0grfCKpF!#q9p$-jeNO0zdk#>^AE(b*bAQYt9TQL#FFnRiuk0iN0CnX}r8& ziNbaGQ3_#-Uw>((oqKjuAm3n}j3 zKM_opNUPu87_~zV)SdQr-a7~%U4Ab+pklE4M)%o$>xOqdCa+dDz&OKR ze7lJ2_~*}Lh+TRpwb9l!MtusmmK-#Gd>yU~po{(Cct1&;h9Fqhj34$wM5?c=@{fI&1w@&vmAQ!8t{qW@j zpGo;*%pgKX`iGEKeLen1UN_F3nM(YPhPk%K`9DT2e%l%HQ^7!jhCuLyjq7*q0$jYV zU7*P;?E=Ig{?=Oy@y7n{to@}~pc^^5Zeeps|9<6Vd0sqLul+S$TO45x&vSp5^%(+E^%c=Q3D(dBx>+0x~B_wK=Cg{f66cpvir>non!id#K)liO4&i)#op_=_QAu**G z#wCR)(W$SWo|;~QUU9ouPJ;g91L}`GGRAs(daQbS>jruTMn?AMgUv@?-SG(gpkZ7oc6H99h&qhstbGs3_;{$xG$yI4UtI{Y_=>-SAJm(c)7 zFn}2nsf7fQe+2`80N7P9jO$Oq0Cr{;5co>AAb0+QcmZF401LWYMZzEf1sQ#}pK&p0 z>Q9j}$dy0h1V9VFF;&OXSq@Hx#^WuvNXoQ-L0F4GDHu1 zBD_K~o{d<|Zb>10?L-p$-q@;jFHQY>Gqo+r1ZVKqJvT+;7eUJT@kpCE52i#Q9 z@l;C)qs{60uyC!wJ4USvHG-_&gRp(2@8HM@qE`+b#c2lR@7V+`{YC@)-|?UMsgsL^ zr$0R8J+jB9lubW8e7J?xCTW1nl&U5ZM)Dbh{CN%V#d6k)^f3WXkshiKSa*=o4X}4e zXJxG@Z)2c5-)SQ=fU8JU6Oty2$BIFgGXPbDB}&p!J;t!bf-}Hx#=sejv?KXKiHITF zCSZUuD~(EO))MfF{MetD$l7m1q^9iN3{aOGaX-GUDIMfmbtPpk(z{D{!Y?w(D# zzo=>%XI(fTZ5Mubx8;`ez1L67?(;@H9?WPu{9qX}D)uTs)`~Q7K$Y?f*rfV%k7+?R z`)P&iWY{B3c(R;Cc=@@0I#R<{0kWb1ECrU$?W&gp;+46#SJzre9xnvNM@4dL$R&mN zzQw=0LFIWs^Wsy2mC#JG8zl>~-Dzzm1|b^Jj~m#D`1~OB4}JXM?mKe@x=}2C9P=yH z9lLAj#9H-kC%XanHx1+a4>0WE=A^BXo*z{Zq^*9f!aBV38kIJ0HTF1HnNp@irQrU_ z#u38-0_c-XVsq##s>)VdGSlI$H6(sk5dkeLCrRbk;-N^9;vsM!y=dpMn9O>#MW*fX#f)6&^sX3`%F)(lJCsqbhC zlSCPE@yWu&QW;pD2z~2 z9xaL+f4nX~l$bI@efnB6nEAy{8?I2v+%4>75Q~OOOr+3&1>r%Ha6As$_LoKWD7qwCbtsR?zrc83PaI)`_xsu1XP49A^q0Wgtf=hziI=% zIzhBm$9j^HLhyBh^;&hEdR3j$9ShAYSL;*u$xN4ae@Pin7w(7IC^WNOA*dgkChjQT z70IV5Ksr^CI(igBIC4NUV|MHQ^yl{)7GjxW)1T^=x^GhEQM{DDh2u&dXI9Th80!(T zo7E6&j&jWU5|+C9Zj@njxm%RJW0{%a%11b}tW`onAMeCP?D#IDVwVEk@u*lU!NI){ z24cCnFm(OzgJ#7WeQv89xg(wd$w^EqW5$W?(XRIFE8e8r-AZz+=_&%o@{H8viD7X% z$e>J<<{gXZk-9<31=G1{Hgcu}b;t2}r#zL~y9OeOrh7{F4_gfuIyOGrzhMqFAtE5( zdi;32NJYa?4L*;ottw41+l=DF;72~9<>1+qUh>&9A#pKJ;`dj zCZrcOx^hi7ho6M@8|X%`q6x%@vEW z*d>|+Anl~3ZjWK=h!D30{k;<8;LG-WrKOExv0}w-3uE@KS}bza9btJ#5K@Yi{muzSCMsSC zd&({vyQAmgIHa2wes}2G8SnA0fTF~D_}0HYc+K08YIk4=+iPcq!p- zpzsgR!~Im&vU;>krDP-nG(Hf*K!)B#Ad6c0G35fREG&86_3jWxdwp{k;Y{(MFV5@OTHJzf2pYsZ7N zkBiDlkIfVjU=o>GAhUY`D%bF6iW0JJq$T}0jopPj{a$q}9EG@}$!KtL z_ku-MP@Xk{bdyRX%$@yuDF6r^pQdRaeS`~cNK3{Y%{JN3P%1RlQAFBb79ZH=7Q1V@ z+2&_Q44yj$KSX-UTes5!D?E6^V6gKkC#NiV(f@4TW4EYVXS#UMCAJr`VdAD}04giM zNpetq3p6r~(s?*Kz@I|Dyd5oN;->1B0M#_o-cF!z%9pA7EwyDe<5ZDl{|l*(GV3>) z5S~usJtVBl=)#fYIs3!)*!X&umOCCsUf`Kh_nUB4Z8KTTa>6Z3Y7+L42RfD1H@-*a6)Ijkq$oXlTujgWYXeS*|n8UTGkIj~dklghmj| zAaj5B3HwYCZZw{bAMOVahSpJw4XB(=3|2taZEr_GPKE}Ya&m+x|NU*ws4azk-m>d- z>UrJUS(gq6M|EmU<=PAmcgy^VbTk8p+^aATCq`8f%%Sa%M~yqi5T7wGV(0!TS@R&USucol*>1Z{rJ$kEQHq~~lN5!E7(0(sec_1fJX8JQZ+-8-X? zmxn)PrUBDC@T{=Jsj-d$=)b}^|2&xq-C178Tv`itO8siTW9zCInwvF+4=Ww#{e0=& z7nKUP9ocr0EA63~H6*TR`ilA}a_V!Z%Ne>3+iLzbeCFI(ySWhkvF3(j-1x`y1RBrH zOJvw>pv>M20LU&STq<3KTlgGTxn3e>6+#zoB3XP>(2^1J3WhDsPw(tHr2oTgp0j|j zBQK|{n~hw03w0zbZw!91N)et|93+4p>LPxRxEx%*)aPveHgAO81?}ek7Zx)bjSs#7 zH10tpXTW5-8HdNYesO)=BIDUzp4(d@CfldAimBt2FR8S=@kjTMGjK!jgwA6d9GWu> z(Du{IL$e=lU7A&#OA2aciAOcbeY=b} zu?ccSnEA5{Au5B1imB$g$3&ybcO?TS=AGcJZHWFCxXgKi6++0(SAxWTD-Cvs@L@kZh1AHoNUglhC`#k@ z?~4S++L8zk?(1mclbR)zt#_X>DJ?iMX0lBoBV$_zU?F$Nt?a{ESd}stuuhC5DK3vW)r~x&2=?ky zq0!aZ-O@jZlsG%!WlFz}GwB+kk|AA#s<@=f;ilY<@YlA$HAU7ARfG*H)^+^&G+%7# zZ@w2VSt^m$6kVx3b_nTwMOikGLIw$Wwp?Ey9Iwkc+L8gY#qVq^{@d;o>L2A#5lh< zSiRbMjfbr4B}$>dWQgkvUN=|SY*(m&rvB3Gcb!jOoH|Wdh*&zrn^;!Fn^}@2KbEm_ zl0N(IN1oM+wt3 zi0V*x4@0q{ixiRzW5hq2$ zTOm8>*xYM(Za_x;Q4g;#20XQ>EN@nt@Fr!!s)l^9Ut4=kS+I_KsA+^!1=3Nn!KM<5 zn92a}uX!}IA}2pTi8SJR+v7OL2&5U2`)A`VAZLsmtYzW@`<^lw4BO~-x!oI7%06QjCi@gHOX8$D;zeAINkAfJ1z4&+q zg5Yr42pW2>j=)Uo&?q2+@9nLZtI+ljsVv%Y%;0U54QQ@)h649lxy(cq4d1j7zY{H2 z5IC$^+k>alLVQ-J@G}3zdfV5|Maix-=7z$n->AR844#vft+G{AY>}0-x~Oyvkyb{4 z{vs^L_fC$K;Kux1KhoMek&DAlLQl!4=Zp^cOl0{0(*=@>-HChIlusA3f;251%q%TW zuaP=p`ST!sIek;$w8vp3@ZQSXx%70sKFdW5zNN3+NXpwO2DTtIO|gQ@>Dc+TSeK(Q zn1cTQhy8 zv6lh}W|0;*>IKDtyu?VwMiwJ{RJIvBr_A}0MCZ4it$r&4?Y zxa28icHw-U@xqp;WA{0)kw+y5sx6VAoSakuD=D^rr{>E-O+3c=TnQGYzK@US4DJL}<%&r<02ijc(aAZ5+L}7GtHAB>ok-72xULbYGclngO z{0JZqlUcAeNJvLSD_No^$62qs5X`_^Gj$-T?=aSw6#gQN5_j{9^oI^fjBFKyoIb-A zBZ6?J?VY9x;LG&0I;u6|89wv{Rsp^N!$yp0s!g2ZtF7Z-mg_&672VE73FWsA0#!@a ztPM53p;1WD%s;`TbaqbR5p+t~zuu&;HE7F-xK>+BkbJn!dK(0>J1b7r>{MyVnys28 zVevy?h;Tx2N6MVfn*5OR0s3C4rn=NAKSo90WXR2g+`<#uaN3*d>iFOd9ruB+Oj}YW zSp*lrd@(s;MbY;`)9R$Hgnja@2B=8o8JS{L@lq~X@I$ZRq?HA+%?P7aK&=BiB_joC z!E^(`uxfPwe22Sg{t{q3!7`+iLW}EY8~xjQ>4&EljNK4Qn%oepAjPpC&4S753bsSPbWi7GdNQV{&vm)vL_gKZX;OuN6839b?D=W zs6)6|;;=I{$FsncKz-D6A}JP%jo|K5&j=Ftbav>-~AC9 zhVPZE@A}VoH-!23{{qn8k=DOK!tam&89=lCW4`|fB>d;k|CT}1e>a`}8wmaHB#`-^ z(CmK+^!#)Jbb@q3|H1=BzlosuHxrclrh@-i#Q(ts)&C)bR^O=3LC@OoKLNsj%;JC1 z`Twm(^bZp>bN;rCzndQaiw*u4C}{Q#0{ z^2sSFVPX7zr5FMx99-})4YN&z5&=Sm@=~#;`F5dU24Ku@Q9KEzOtYzquCH!isIj{} z#6DeLON&J_E$3SfQ(NyBiVS~B@rV*eRXv7Z&r5npKV33k;-x&>a$dqcaYj7}oJlYS zey~IT=J>c9cM=@Z6Rb6bw?>~BF{1o0YP!|L$DYO%%h82 zuihw7wzTEYd>Vdw$5s|jGNjj6ZXCt>Z+3E5OUBxJj92d0RIS{!uH-Nu-1N?mV7-eY z&AMC z6PCIcX%rJ8#z2foc1EZS^XTAzg4G74Oi&pmQ!yqKj7b|J-LllCI{Vlds8UnVlOc4$ z=mXLSCgem0pbdf3IMW#-kXz1Yb_d2mZey}fc4XzDJ&9-i@eI2RLti`Duo4<2%_7{Qx4&7Vwn26 zw?Z5F0x5v8Z7dHaOU1DV?3%wpDr4W zO;1_}*+6UXJG9sxV@y6+Lyi>fdC%-G=d+7AuZQThizf)plOlYFp#h2q3piKeL3}`X z)vT6Zm*b~i_S~^hwZYb_G9OJ zT#|(5bpGX}2V76aCj~}o$sqlshYdgHv+fcMgCf4g=rGsP4&swp(q?cF?$e-Ap_3ekIZ9qJ}#aTvvN!mL>cxrt*_gXDP2GF49Mr`HWLZqp-#>;QZo~g z(jy=?juA7Y-O$eKQB07#@EmVmcp%F8UQ1Rt=WzIZivP81g=L$u!c)tY919TGG3QgM zC@dm8hh_uNMO(&m7c7ny&+hUwgt_!Ghqu=@?=bf6Qt|o(?aQUQ$8;Vv^7Ol8+<{0Q zeFD0!Hh5G1@N@O!acrr%qqjnpWQDS0SoC!Dv7$DSM+b@H zk7iNW7^3^HD1tVLAOq>yq&(Ap=DoV$uhh4KRZ;Lc>$0Uq281TWvwc2`X?q4Dt#;Us z9>+=scnpc)bHOK|qg_jHWuRTB!e*+euz{*p07=G3PBV*YsAoH=4s=lTj|N^k8}fX= zIyGdeaF05sm7Di?Scf>ntP?=UsD-gWWq>~C5c?gf!hc9HUHuKyKK4s0A6L~QwZ0@7 zZ7`9IODF&jzJ0^a^dl9p6d0oo-P?HVI6HeV66Mf*YVSVW-$9i$BT|W7{gTKyXS#&3 zMKbqbqffB!57KV)`+M4P2kmY&Ps`c;u6=zP4u(Uy1Mm1PR<;~V4*tZFsfTlDnYxbB z^}bsY@5i+4orTNRU|z&xfM&a!3#*Kn z92J-tkByTEfb^h@cJF~Z)U`tlLCzn##rG0woBQ;YeOmi40;Pi5VRPBVRm$V?(5sah z2^~+G(F9FnmS{l3HcF`A$6k{xTAs$c^!)Up=72(*jbO~Zu$z|29@N`{|7+ICQovDSTc0Ddo#B^IcK zY0t?EtIo;G1OEwdMYf48@Ogv~PNb*dTE6Yb-FL79{i1?iZg6O+to0!4x+?UAQ`@`Y z(zFKRL+y_QH|=$IrtI+tg6b)$C6Imezh> zMT?xsW=v>YxYZoDAD}&(Pm#{Jh-^c)0?{o%N(y!IB{RNq!=V>Ll`aZwLyyc<*J&DQ z0fEntv_=hSeKll1nU2Cp~sdgc46&SSyfl7*s`04hl zhd@dQTkJH&_{6Im2LT_FDAF0MotKN@{ZB$h7J7JwX05qG3fmK>OF>iI$b^y$8 z^U^$xbYZwG@OCw*pN=vL)Rn8!5yRI7p^p6Bm+~gr982383jt_@`gOIGmr6+H{ShmW zpUTvY0*=z~p$X=#yj*uk6UM4fcYeFQt)rl5=o1*qo;}s&P1*vite>Y64!9f}@`=b) z-$7s&WOf=}6e}#8(JJ_Di7qy~Ecf$pM9uB5!IuA3#c$G#sV@4!^EiX6vQ!gR6E-s% zZcl)qX@r8DwoweYA4_~cKq--q>%JP>tp4cRCmYi^H3{&8)~E*sgjlM)wOLC%1PRxE zx>_;|Usv)fl|-DIX;8qTptBb^U9eYDj=G&*#{Ma10r zA%0J1JHVA&*bkochcD^6Nas++3dJV#F#V2E+YK^#>OfNZSkkq!82w{4#|zvHR6ML$ zR2ziup=CV+tFznK*sAdrm0y#YJcVQ1bi^AbiW%!^8rM1&7044p%t_MbGU;WH>Vd`f zvsltE{UBnIOXmCYfr%qo5hb&zmY{5}C#TTZVMGox7ITg2RU#Ei75n=5sScdA71iZ! zrJtyJ4adq`exnA@~G39zdn1FrFexKjBqa3yzFf(`ca zKziy{a_))W?`TlXQVIEj_x#}p&()AFIl!2hm@!{63vfC6tsz*1(*Yh(HK>2gNLLEmx5EspW(<=GRe!|K71$2;v@q91+S@iahvtfz&F|GF~~A5d$(a zYA=kL0^pWBTm8DYOA^>>uq+;I~UXtPM7R%JGzDvMI^|U{K;QEcGUTDcH z$3>%#C;G{Q0o`_Gc;OPjO7)z>k);JJnoA?mlXQNn)&*~grnr7+dg2}+OFd&OG(!ja zDXO;Epl|=o3VMsNMp#=Kbb(pqnh7SM^=oPAUc z*o@823;JFC4g-1pu|Vo*-P_{f=XXD~gDw;lD2Ufh!`NrVw;C&CD+NTxJ-V39*y z53;(zSfAMuOU_DcDFpR%%uTvOasnsXkkaPotR^FWkA=ahAd z>U_a6NfXV{o{P-tBE?!aV_P8c9@$@q%)Fn>DjAXL7PL&-Oz=<^x;3HmD;N~zk^x>9 zmefT}D<+n>D4e=uTHOe(;cnduwF5VNx6vE9ztFJ9_LF?w=P$_>LhMb*(K-8N8a}PU zlDK4?bh=>X=oq!%A3^kdK~Y^=9%b&R*cKuh1a;Mf>vnD?AINM&_b~2a^C*869(h*` zBcj~UMaeM;xA(k%dSC9*UWC^qZ(X7dz;NW+f;7V@&wMdn!*gJifDC85 z$eElDl#Rg&kiFjez8jX&r&oN3Y=8Ca8T?0h2TexFIoCG^Y4JzJ;lPj-1IZhw0tJ<4JztYL;sFDU)&Ccib9Pj$Mif16N*G1ML}S`xMKFc#zX+;?w* zGuP1Y;1CKF)J@pQ3D3G4g-m*w1&`O~Z`HmRVMPHDZ=(89TXyJj-YGYx15i}kP>bC< zi_W&yI;NL~IwqC|NPR2W5kqM(qBQdm{h@vfa|g-aOO!Y}wRU1X7h@xei~V$ebLSV3 z5Z4-xbDzT}R7sLNM)1Jo#WxIN#nQbp^Ef=GlH57}Z@QjduhnV0Ru z5a1;*HL9)*+M9sG^O}|gg#|%~4F`>jO-SY7mLC;6`H?+fB45tvF{@UFzOt6qKckl5 z^hQtYDncphWQXPPAonhMn6l>81ge~PHzK23;)e1B;M|8qak^$6zt94fuuc!N#pu4K zM$w#oTpyd7o@+YKVUv8pamhkwTcsmB3xZBKBP5dhh;8klT@L?Vy6DwskWJx~!#2EQ zalf!O*D8oHC9xo9wb(^s6A4#`#Ncv%Y-~Pyhj_fMm_3wir#XElpbF^ql6O^Rv5X%8 zI_;Hc|N4Tv+N4}oYqSI}A!4LkyE*@zSd`u6a+$k+s{OPY- zH5BSfSTOLulJ9b@M3+Nt%7a$GFtGtA;mR_RAWQ+{-hR{p#-gaFPl5izEb zE6tEeRciEH80>%%_#mx$OrV{WT`o`6u{zNCg?0mRU`K|v9@DK-i_eDpv!xn|?|lCX zZ;%H6^F?xSPco!9mqY{ABHyne>ND@qx}v+YRi*CzjihN0cltmdgT^s_iG&+x0l73R zjnlz}&;P(SG>ME}KKi|j`>*8fEvWm)BN=s@kjD+u)9rFoN<)m<{7GRc015))w2rR| zBoOe&*3ebQ?hVr6aM~#@-OzXu>R4h`%`Qw+>P&s$nl_Mn#_q80)h_PzwysRz*;mK7 zSlnie2~~b5Oc+HClw@@dZU1r4psO+pKnrm9Qj1i13Gpo>_9Ol&p2h|t#zm?t0QQFq z^@NVoRp%E)cqwFc1TAP}h@>sgv(W8675n1MFB5}slUcBL(#jw4@JQ#^cnEcj^nvbh zw@gFc#d%cLREXueGpt~=L%+#rwfEb_?;nXW*|bwg>l~I+(dA)_mC4(GxbFs zM&r!FkN)_00u+4jYIsh4L$K4;wTo%iT=(in&!Op=-!saio8FXPjUZ@-IU34+J!YPw zyEZvIT$18n5<&-w)$hn7wDv4dM_!T#$aiJ}TI^gc5-y-FLZaihDgLZ;VfL9W)94DP z-jJk$YbFJE#(yeR$AB=kc7TGkN_*6!g6CbPe7W9P-?$j6cilLBikj?7UbTO+=M#%G z%h*`6VRx16>JHVV4+3Y{2QCri@UhrF_x5e`dNJZ$`Fr{^Wv9DZ1+r)*lm{b=9XlHZ zFu~4ZD>9OGEe%hTxYWUy+{Zv84Y`GLSOIJk!Swe)?@B$2>LB3|K;y&{9t_rQ;ClDF z=?w^-uJrL`X^~E%?igVEDBkd9T4Q+<*@OM&&Qr_uDSYtTvej$UUw+WyuIkwBkuTVG z--N=N&{FQ&esw5Xk;)Vt#=x>TC6Co|~N?ySIr4Smvho216lLHbFV!h<$g=!+-) z$|^s8pa2avVc+v3s6^!DV(Ym=gA-OKm$ph=3+o~c7v~ozd(GR-AMUsMX2x|M_atZn zXXip!`dCCMq5JEe+~zg=J?1%HeT>dJX+8}00lI*l=XnQ%N811iaM&|0yEpm?3>z2+ zk!QK$@VP-HiLeGHgtMU#Pi>w{s)Y+z3I$)O6`UHVsX1iRyE6Ane7+94s22q_e(fg} zISH>mL{J>2s=fTr%*62R*%Q0Ay1nU` zQDk33{d+Ophp>3h&9AVx>j$|dn%qP9q_mIaT_y3%e)&D3IvjzlYjC=FyFH%N?LTtI zH813!igW)ZBymS~m)yV%1Ah(TOGd)tt=X8f%_Hb2hs10v&SLK7=XAExa<%_dY&NC8CCBRy09N0oYTF=(AL7`;wD$gjzskSxoIDGpHtOz~LU6W)0-;(wq1`q%j4d%x;CzM#ivXZzke`H%GCJ9_vZ(u;rcivN^e{7-{F^xw0n|4n-F z{}Nb8{I`(>fKG)@l}?TBe~K>tWrp-$rcwSa!1!lS^Pl|UpWpXZ)jz-gEy?)rEa(3c zX#5X;^FI?u|L!^eyXpL|`tyIx`Th60^Z$=XJS?BtOlmb%*>R3GwHGwUZbU^^ChQ(A`afd2;i9g~Omk z)rlBHrlT{5g^5JbKV~+K5*~joXU`5E5VJmS9B-LW#hj>nbia9DzkiEWs{!$1#0;Xx zO$jBTKA0q(qzJ(e@Y*SFyAwHr?eSH)HOOyIo*pA7C3Fzc!17i;je0*wlEe;nN2B`) z>h!$3I|whfZt7byxICUF?3@)zrPHli$VbQ~rpFbnk1sIl#>qw){XLOPTWD9tr^yJH zuAAp3ctFp(?BGPuC%#=y{@E1 zW$6ArL&ru&#MnhLKE}SdL?Znz9~s4YCJ5db55u#T%na#C_NRz+qO?()yEPt%r{Yi< zbY<>$YIy=KH7`3Xs4u3(m3rGK4xuJo^h6KJ_5rMNM+MFtlnxd7)>Ux z45Z};N|hHZ&DomL(*>h)m*gDE;uh4+k(;71rK?J|ay2PD%z>GM*TkZVSma?B*vuIZ z#iB|!$jQuqHU+6mNf#!|C6pE|7@pCXV=yPF%SaahD#q%ss#?zFO6(v6; zY+USa6sU3NwAUOjWSep4sJf|ZB~2zS&QP6Br|wR)X&bl?J-?N;Nak#LF}ziuuomhV z_joMHFR8q}Kl5_vzWEXX>tLMl-keE7fx+M?ecAVpldZx(-R-ak@Ol_+Qn_ROIjXE$ zdm5|&*6I4Y*0&r;&=710{d)G;xJ+jcq=~39wRxcBR-om>8L-J4j0$tkNo}wau!}p; zgDuZmFD0ku666PA#R?$<7Iq`CEWy&nmxTg%n+u?DG03?ZX1V3`7gRAtXZMmR3Gk8w z7tIsdoONjpds+y_7=2dDokznJX=V|l;|wA@N8*Q}T+fw;|1)|ju}NcJ2wTlxbU0@p z8uY>fmI8%Bt$%Y2aE+NzHw8`jJ&4c0Kyu(DPFcx+O3fA69@=&HedO*#7y4yFBFvbH zcjP9Pn2OztglsWtSh>*yk(vxoq2QbtPtSkqIJrtJP>{<$&&U1i;q_iPLz6A@CB=kQ z5Z&|)!vVj&QYt>YdPrfHdDjTl&Q!xJsqC|EX-UXNg{+VCmmdmz9?|lFV@COJ>^*!6 zZW=5F8Uw-3Ju-DLrQb#9Fjx%1to(}PS>e_TW|l$TLx zm(TV2I&97kPteDH{WD!Z%+bA>%PkeQwm+|KT=Z7Sxb^XjWW7yBaR==>8rF~! z2!{t=jFc(Lr_=BRsDmcz>iv>HXTCtHe(b`w&?9S+Klq&qg2flGvF)~<;H~7#q0KNM z#@Xiq)&pY5XB}q;(uU;0umG&tRuJ&a&tpv3MI1{yv%pVV{N|+&J*>*WtXil{!4@!^ zRn&dpo}yn~_L7vIM4nSuoNlc<_`gZrv(%-dgMWw@lPr)L2)O#I7p+Y0zqZ15B};qk1obpaEszv zlhAOniezw-W)kPXgWP4FqwT1EwZu3fp*|n{JRAZ?(Z;djB_b481zw|z}LdjcHviHxZ z1W8gC9#goTDGhUF|Lxj6(N1-%rl8Y90_}~@eN$2t?j4)gcB0G##!jR%XNGi(jSKhJ zD76SN2soMJQ`{MC>)u?$c?3el53O=OQm12N+hH*}tK6TQ>4l;Bmq)=s30XvOA#fqsTjs z-9M9r#)`>6qYp_5k|yT7n;DoeF|5u}2sk%KgxT1H_QBty-;FOOvW3xKKF-u8oh zw&LS`hZfJ$@C zdGYg(-RGFuxfJ*NaIvBsayz)>LP!0W4<_VFY_*_)2A~IG7{z;q$XyUnw9`y5KdVVd z8QN2#sp6!f@a3;eyR5zuuPoL_Y-4>mpY0wfiU*aSoz!y&#lcTl!mTqnttKbi5Vbup z%$PTxO*qN{C4h{n;=<(_@TZk5n@1I8@?bd*kX0bU`%=_5b(_XK`$2V2t zlaJ^9nqq+YGw>WoY$1>vJzs1n2=@CmmyU>kG>Rb)B;h6)1L0^Z+4*)PYBQ&-M{I^v zsSCs(|3@)@D25g02^|BX*DDHB@~(=4!11oK=&bkLyTw|~*1H{vw&uWTy!W|RVXlr4 zs$0T%nO&h~3c^WhBsWC86v;^aN5Fa#TE=U=uh(Dp-UB`WL|-HjNW2bno*LLFT*^!= zBt<$|r2ecN5^gv^WQ=LwYgk5sel}+p3{$d3G_$kz`h)o@Yvi%9U}N5cf{Xf)`e6sL zOgDjjnao-;NVGr3(m`a4c}h1vsKO)x0S%-fr|+cU&k&@izv?GziMNG~0x99tQjc## zba|&SZc-jn>&3MUf+aD@o%*@Mo9`9^_R8^MXJYp5)x(kHVO#?CTjC$VA&wr1atbb7 z!2rgWrFWH}W)fU7jBO~J45=g%njaH}n73{K(r>Z*i5?fCk@;S~U9%$B=HqBBL$lEI;dt=Cz4dwGMC?sEVm1h};3i#MfmBE=$nHdvr6vDi zN0p3_2#}7gWX?e|jvN zni6q1W;P;8_e*HtcADkQHLLzdJ%MD3mV_;QKJ>9SJfd{`zU-_jgDCi~FXZuu`d6B0 zV%fiRiKi+0Y0e%qip|x|QR-O^q@fimODX(FicKABbc9UaCG#Z59yvT*5$>@Ht1lCI z)f5W59(!^_LC#ubCvvGA$!x@AOKs|voQ1T%{Lp9k%-i(|X`3;k(nG^IMrx}pz=-OI zRl5Q*;qY0P)T|r%@;G?;a{xOzINZP$mVffijR@M}%rrXi#wMF>ND+RKvI5ktvB?ld z;P~j`ei@P)6&?xFo-jU;Z0Dn9*w{sQw`eCpHNVEjWu;Ar9t`>Jja6J{GK;P8@^R8` z66dm@W2pMSRN=n6TsqO+{V;x#SU$YFdl@UAn=5D0Ss75lWm8{*oY-~oBE8pU+A3CO zLqh$1-wnz6yp^|9aHx5-1g6t38)*{GvnwlFU9cQnGWgy*s&D;DzqHlo)3F|1TrdJg zLvA`3_j2l8K`+2(Qa?oUo*@|y0przGeU&_=i5|8Cn^*hB4#8|b7EgfqR7ff+vdj2V z3ezr*Hy$E8d2s*CE$WjFXYJ{Na+wUm$R5Z2w>#w|P25(>5U~_4KaR0_T54Qsk;2O9 zRGp-dzOTD>-Sb?~z58w#!guh$d4 zn%>DB$PUbXbEL>v2z`ruG{~L$G%|7#vK{^vgj6%sbo@?f)>FtUyGPPAP3|PQE&S9w zm0U_+DMRjc7$epo;)KGFV5M5o=asc_(Z<6YXycz^klvSd#!l8C@+Kh0b0|c0TVRs% zPU;A=F6!&)BEM6QQ?bnG=_4#?RHpc%>wm+qx;)q3qC=iNB(~vG_3zA3)p1mx;ML3dkQS+2xVDWbAxVS+_0)o)q{nS49bDHAIknvy41mAfV0hUvXw z6^NqcBJyer>PXxtr__)Te~AY6Z-w*Tt|hHL^JNx|upouWP}J&f&iM|H4ur zk*bc*FRdQ6w9ga466dk%)O3a~&tpkC-I5*u;+P=SE-iFW;LFM&?_B%rC8g7@7$yBt zHNJ3$zU#FHB8r*~T4QW#8?AVO5qFYky$6ltFP^)Q{g7QZ>W>>QWtJ z%S)ZX@e{6>Wjltzv2&V02%{k}&5Xj(yZm(~%|_Vu=RSKmEY04ynH76R0I82W0Q@VE znoH?ilxsm)*?F=8-tBwP!)#gP0VSgIhK&i76JE_SrUYT`(C^RQ=m-LhuuX26DOQNh z?)!z9Mnv22CJCb~TM}2u=VfpHEqtPwbavb&*v(8`o?$4G9%>^v*@?UyLhsYd{ za^VM|CoiuI3+Fj4-5v}_P#>5v*I#Q;K9ao)rJ2Hus7{Y!BFH$+YgZ6~O;gCAyiZJo z%ZfQzaZ}{%&ph^?K40{R){T+eh%1nx`3Ys9r%+vAj2kdK9=S{EwEgUNbbWt^bZZqE6t$C<*g-Gj-as8 zNMe@q|CUbAC$N?)Zfh;}T9vOt&-Z~U5HxGXrD(#6kWW%hQt4+?u9V$q8DTUgE!`a! zzi|=$*6A$`Rb4?q>mMEoK$h=a&>xI;VtbN_w)T;P(UDJtQt@kxsMK9M3(_FFn|@Z% zPx)LlH|KDriGed@I`LSI80dTH>BzW?W{982X314dp1^f|iIS(B;D=i+Ny5|ejY%b2 zIitnx+ePw;Uq6SpaCCr}2$*@g9P)q_$^<*w@AvpaJtD2Z zmo6|sr7|k`Qx}Uox-fIWumnd*xx~5f8pd2~46aH`XCjhL7lMo?nV$q1ozbD`QTN9Y zi#x{6ikXJWPG2EVxfpR8faxS`<%hxnzGDNkd{SRtoIC-)GQBd45i<(*Q7u$oZA6^$-swB69$;JjG{zJ97F|npA?$oe|AbJNU|7>4w4mOjOn^(JLq2nR{kU^ z-U^$E5@KrV;oKjAm8e}!y@GGHGLy~Kn0+_IcHC+4HYe}sB`c^#ju;#wC)tOjwb4F! z$k4a$p9~iEH3#!zy*{P{uU@y@p!tCqq9l0`_9pFaI^KOj`U61nc{;hc*eDr@8wu-+ zyF1In2MQ+rTH~ma=YF|+&E+$n@sWq`m_M)|22v{hNeT)X8Va^Kh32-q+`NZlBM{dV zL(nbWO3r-_eZgL14!QIuj9{)Y2%kyZAY}@&jpG=R$5L7o3e|0wZp0g0cj&fX)!u}w z&Eewh*LDc>GV>sJ&p?x6QaZPnWN>gKB?rIr*W4!L8loN(ZYNuh=ofY-M(k!lZ7gHY z4T^-yVrRe3v0BCyMntQEB|#=ZX5lueEK2t`T$vOS9X4l^BJE6q7ydHa4pl=lnwKy2 z$g}y&ufz4pd=y?#pMbx`!lI2K91TE%Rgo;k%kxPtAP}+wX$dO`Ul^`HJ$#E%k5e3~ zTe04_xUC3oArzHyeYHU;XO0pjG)Bh^e9RP>aiSrd_F*NoyZ}ss7g5x9899Z>W0(UwM9K>lkDxZ)i_%7w- z8hhv9h3oH-UjGA;rHE2ED}2~#-0B7zhza8in~?ilRv?MpGD2+Sz_6H0@>JUp${|Z1HVh&R~m_jtOm`#)U(?2l)lR<;rLEIW4V>&Umog%^z8o_&Hl_RzeS614|D&K~yvlRC!ow34ngi=2>&x z^NG_Iq!o1{Jw9pkwP&p9LYI63LYBfhFSMe-DAA>m&R%Sg1avgQ3eh!%AmIr%&&aJL+LJcRd!UAm5w32Syr;rTEH|6GgH zZSm7|=sdo0^SJ)9o`ypTfNs~Z3_~AMDKMx!&4spJ-Xy+; zkJF)}sP0thsPW{mflM^WffKd9Dq*Xk=j5RC4~a7D#Kq40u{i_W0?W6ej&yx+5dMH{ zH#yR+Hn+;Gw8o6&HHezTujYn*vD?9$GBhk;DMf*N-+ycv{Bhb7+mEmX4<-8Dj4d1y ze+gPFMoh~2ar@h3H2%PFR?H#mV5>da@R&%wD|=boO0O-s0TpaVYJd71CfdXCu_ch>XQUg6~{vgMk^4{F=4%j(jNf*PJ zkX-E8UHW(_5Ytb0s0s{5B3ntBOp9JVd*fd3eOP*-ro`ouBqYu9ciIb|P{vq!wuhr< zS&Pq3o`)V~znTiP6-{#?17dd*%EJ_j!?fVWpnLlHR$wM!RIvT9Y8OdJ%*@BnNsdQQNuStZ4NGa6NtY4lp-jxDYz)2}461I#c%n{q zx!CrTTtLJ4le3I~j|e6K8ba#=ptJsPH0BbQBOxPa_x#d<9Rr!=UoXr|{OFJD{E)iU zPs}@4uKSdqk}j*o^&08dI<(r(;Jo4hwDw`z#$RWmbSmsZr~1>c!b;1$qjlqX|GeTK zhL}$)#%j0OfevWdKezlrO&7r*U)W%qKpwzp+jX!xH$IFtNHd=ndUqOR0b!&KY17wF zoPxe#U^GE{3od<2j8s+ZtT5bnEh(>oHV*J-Nr!@4?Q{V7ZQmY0c%uX}uO;$cj#y&v zXRlUu1#v+%y)0)DZH3RRtUNEonj&syC0JBARnMH;3*zsb zEZcOp0jt?Hl)(Ny@(Npife5>b&pRxpddNo)+cpXNL!dPwkOPUpRW|8%uy~B1Y`L6) z`e}94n4uc#&BJCR=R*&B@0vMc>p*Gw;GLTp*BlBRj?nz~2=HyMrjxZ5g)8mHQ@CA& zqooT*0}(oVyBl*c3Kjs$@-+-yxD=ztg8d=FY|uovq{oGU()q#qD?S{ra_ApK@xe3@ zIK;7nYw=k+EiMh%w-;m-;$qv@hV^BMXbI9;&P7P@h^e{{od8~HBV11tOkp~tx4O+w z+x8(zAs@dbOn4unZT#12)XQ!fVLB+4)qvY5%%7qoXKS1JQcXNjwIL6N-dZoH%H-4m zRl%v53()!uzbRsyGlp>j-%C(Vn%4Zes;e)>ltWtgZO6l?9+%qv;M|tLIz->xL)>D= z$Ki&&OI1wFLJNwT+-#PI5(9^VNj;`p39ZR1vw29N8>Tl(2A13dK|{jBd|+pQp-qAO zK+nIB3uf2)z6gpFYJU+y>uj1;#5Zp8pSlzZnfMInQ!n&1HIz|Mlx9jk_~FD2(K9a= z4HYBt443St)WR6s{i+u9%HkD_4~d!8HRRkKpH=)o1EF$Vz|+xF5tunOG_a4_L)SF2 zX6Ekl8Ec%cNu^p9FTHRkgxWCh>~DVg2QD2npe@2_`^!$Fm|MPtB<9Xb9=RN0r&%2#_@7!}f>lv6z&)U^} z{j^Th=d+smH0`0>rGvYCS9LdKZ|`o}_t(g3PjMV-T%lAfj}^>zH4k~&MCIffgh!;> zKZ|G+7A{GvE{JoIErg#_&)6s0CrHL7U2L-P7VRngqU65kaB$8*z=j~-{+}w?#7L9B>gN_%Q-EZ6G?&;}RE7U(2JW&&- z_fO2}><&NYrCZd5)9dDC-6b*4fqn9qtUXD!iP>b`89o1rep%vhU0tU~a*cT6Xx-*U z;nozC>45;d#PEOB*E4UxGw_VT_mFl%SJ|G3v44@T3>8v|U3ATC}O09ywjq(4?W zVgbI-_ZJ2Lb@p<{nUB9-7=Xb3Wq)4o2>)@Gha29NT6_p)Eh6@P3$=J)$r#2M`NSzGzRT*Ci*Eh2`DCCvWe zJ;sl-yrs2#YfR?aTOM+deR9~^%gKkD2m&iua)&l9MKSO=fr!<#1)`_vk1^SEeIFjb zXl6PKRrF$bmy=vU`;DmOVYO|3<8lT9!);E?b|I*yCa9Bf? zFjliL@Lj;Up%rdxYUEaiNB`Y0h;)jx%C#^2v}EF=AGe{pefli=US4$}+* z!zq}qbJ15%66+<~8{cux_v)l>SMzW?^Yq-hW$VsIMlKV%X(0G?Eur=T&MYC4rEh%G zKwl5XGSc6v{PgmgCdHntV{acm30oYDu4kGwoIJ+weF-h~^mH+F;@q8`v0qc9o{Sm$ zPlOxD7!+5Qnr(N9O3ij_*rG2OHzih*;gCq(jdeLEP)(5ux2)B7AV7j~>DBA|@C^9V z`sJ0IsKw{#)t{<888V+R%O7XZf-Cs2MzHj`$P?%1I!wgbR%`1@I z?^anU><=YxmDf16dhA0-JMDFcfg)aI{+k2gi=PP8-CbQJNC@x!>w+`dM(TAdBVa-9 zn+Aa=%{MMObqp->^2k5BYv55qOR(QvR_m4ny+NbT_SBv#I!R3MZObfr8awpC;FV=< zpk1w9`#kt%5loO-5VO#KBED=_&S2N1+=TgC_3+TbiDtla%gqMde6{Dn2}or zy}A2SO-EJwbZYF$NzltvkGI}0xAL?om0t5WxOHW=`-?V99Vz}Q*bnV@*yMJOr1kjO z;q@KGJy%aFA3Z3imAEmr;;7M;i*r{d1s;~>6VZM-=3F;`%@(C771YgS=1Ni}>ZSxadh`XtO?7p_e_tB=IEC(ZyT5#k zsZQ2<+K0YFZ4FlN^<`q+y$99A=mAY9Wk=*cj+=ipi#48FPVtRWNKn0iKeCBG;;4dT za&y2y!EkSDir{tqYpwZFX#x#(H-=&HwBnuay1S#`c)hcSzwSS?E zw5dhfYd`B`(UGsEcjQDVWqUUG8T#$ZO1Zqmt1Wu$fhXVK8P`Bi|TwYrM}D~ zjx2V4s65`KSwAGVAZzpe_$FsJ$c#?BQo=y1y9T{J;cBjq6NuS+>(!iC+3joBudO}L zI+I$;P*hwd_6q+bBK&Kk^jEv?52Nwp-ac=;GIs0=K&9gfH(eVp+nZaOcfD3)tZisu zcm5v5O_AaI6_2D?-Ft8|{T3hbaEs#9v(STj$4q*Usb1^UIT%n=FF<|GN{e;(SN(SG58-51q2D4|U^5QILw zCc+2O=r_%RvaD-r!Yy-As?+c9&h2t|46)5lmDt^4l0?di^_KQGo6)mgKMzxJf8BY9 zzh+fRanYT>i243GJtvNaPzwq{1sAz7{L%Z|_S z%e}W27;@^n;|45iG!hlTJD;gb4Xgw zC869&`wu9W3K!gDLw1?zi0ewPw?#ted-^iRyelQ|tvst?Ua3)J^d&|wA$xCk2TLaG zj6ge8kTxddU+vJKDbZ^sD0r7yPB%I!SrE zC1aQ5ZGyB=?#fT-mpbwOpC-yi3ahVf8S;qd*g55hsHlZ34_f7dZf%sD({W1naOBh8 z@uWjjLxzve){A&Z9WRk!T2Be)Q@bg`RwmbAY0C>X=LBpjvG`med(9j5L2s`Hdqr7f ziErBTl7Dj0#HL^()PG}nKw|%=(QSmULTl4+nusqjr$&}8;oRmwny7uYj;0}?^qKr6 zQ>3rx&8l5)j(UPcR_7J(&I?&*d)}6BW0BeBT?ujzO0tXIjLLhb;uDRDn^o5YH9kM$ z97?)K4T2h6+=se1#u=#`9HbKLZS-C2AFA0*FG%tCZ{{0b@g>P9Eii0K+#c`SQW=}k z%8&|Tnhw{ZgkGLEQOWi$(Iq`;-8*J1Hhn$x(F=umE2{LAo&9@vzrNfRXKGo=Q9}Uzt24&c-&Ja_Sn7x65-J zl9*<8j!%A6_ZfVnqN8-WoPD$QQFX|^x(TQ0t_ef2V!QpJ{31RBNlnBg*mM(Ww= z?Hvm{o~Y%xi0{0oVx#~a534e%&(U15!Umm7W8P}E5EZ;P>~^|{fW9A(DdB%^8?5a; z{%Hj!{-cqo-Nlx(3H6Iq!+YH1C5)*@D(3~Jrq`THWMZ!5V=kfl;gyDPK6o=ov*UpvlsRrnAVtB zeWKqz=t#<%<7X`nzxyI&ELJ6C`07G`sfMApD1T*tX;*~*{KBW#ODVP63N`P%7ZFJ@ zAGC4G+AOtg_v0P&ZzWh&dEAc^b!J3mv|{IH8o4=jS4-zHTjO>s5C`LfFr#yXd=w1> z_qX4ae-*c8M?haknO|$)!Q%V6w~otrL|@CCjBOe0xYy`2snF5y{`&9h8ta3V4H6UU ztSl^VKlkX}Ul?evd4Pr&^!E8`XgBx6=<1k5Pkdj6WmW0kiM+I6%}_1tqfpxYTO!`{ zCc~A-6rDP?1LlS$Ps!NUEp^h}xzot34m$irF)M?znD^!f2Kc5wujXfNe+=jxDvipa z{(=v+Fwp;j9SYC~er+DCs^RNSb#$R~P!4n_rkk4VVEzMH6qBYVYmPU74BWNp&dhE8 zo^%s`LsLh87e_KpRzrP}ss@$DcAzWyQr(ppOcp?H^g;o=RJHZm#`;^#9MNQViU?+;hX0u$F925pkM#D-lb`y%>2_Qp# z*q$yZ7)_7`U3$_O6l0w&-#Y}K)MRJDkNRM_=%dO2yOgX9VU#f#CBUKN<>$ts`YO43 zNdtK^iZCsHHgoer&*+2(FI+1#EtFw2r2In=_wLS<>CR!Y-B4V?R0p;ifmmP%a7Nkb$)PIU|f zW0fGh5{x&6u@oGh0%O2O3IwStBRGNcK~wD+M9=@F=`7A!P1&BNASBdj`n1gxyOD)^Gax| zAX&f$()7P@e9s5DnRK#|bFul~6AJWi%Js9{FAvJAtA8JhuP2=WkiuaMW)Zgpeml76 z+5jh>f%--x18jN#%*#DcZr^Ay77K&?{f!2Z2^g^57=nhyaRJJ?$B(w$bAT?-{WqJ2 z=R%(UMk5n&1n`(^HVvLJAcBS=VF4Zjf)4^?K+r(R0U+YP@d2Ij09cua238;;XjlS< zcN};e4-JDQ^XdX2Z2*Hg9PH1AkU_u`5jq3?2oMhqOC%v=KoACpLG}j`UD2h57#19EXlJ{UtJBJD>e5n%)kAiI+QehWe_2@4^{0R!6uBq;)cw(H4#o>7TOX8W! zATkO3{XPy5_arhR?m?X-BIi|5?_s2W!F+^4&UxTs147nL7{90eg|Kx8tGH{Zx0yO1^j6%dhcz%DS-AHVjoUL2|?XXcU+T;O40?4peub&a+x`ag!(7 ielN or nx0 > nxN :: ABORT" + call exitt0 + + return + end +c----------------------------------------------------------------------- diff --git a/src/driver_comm.f b/src/driver_comm.f new file mode 100644 index 0000000..d66e436 --- /dev/null +++ b/src/driver_comm.f @@ -0,0 +1,21 @@ +c----------------------------------------------------------------------- + program nekbone + + include 'SIZE' + include 'TOTAL' + include 'mpif.h' + + parameter (lxyz = lx1*ly1*lz1) + parameter (lt=lxyz*lelt) + + + call iniproc(mpi_comm_world) ! has nekmpi common block + +c GET PLATFORM CHARACTERISTICS + iverbose = 1 + call platform_timer(iverbose) ! iverbose=0 or 1 + + call exitt0 + + end +c-------------------------------------------------------------- diff --git a/src/jl/Makefile b/src/jl/Makefile new file mode 100644 index 0000000..96c2f75 --- /dev/null +++ b/src/jl/Makefile @@ -0,0 +1,91 @@ +CC=mpicc -std=c99 --pedantic +CFLAGS+=-DMPI +CFLAGS+=-DPREFIX=jl_ +CFLAGS+=-DNO_NEK_EXITT +CFLAGS+=-DGLOBAL_LONG +LDFLAGS+=-lm + +#CFLAGS+=-DPRINT_MALLOCS=1 + +CFLAGS+=-DUSE_NAIVE_BLAS +#CFLAGS+=-DUSE_CBLAS +#LDFLAGS+=-lcblas + +#CFLAGS+=-DAMG_DUMP +CFLAGS+=-DGS_TIMING -DGS_BARRIER + +#CFLAGS+=-O0 -g +CFLAGS+=-O3 -march=native + +CFLAGS+=-W -Wall -Wno-unused-function -Wno-unused-parameter +#CFLAGS+=-Minform=warn + +CCCMD=$(CC) $(G) $(CFLAGS) +LINKCMD=$(CC) $(G) $(LDFLAGS) +#RLINKCMD = $(LD) -r +.PHONY: cmds deps tests clean objects odepinfo + +TESTS=sort_test sort_test2 sarray_sort_test spchol_test \ + comm_test crystal_test sarray_transfer_test \ + gs_test gs_test_old gs_unique_test \ + xxt_test xxt_test2 crs_test \ + poly_test poly_test2 lob_bnd_test obbox_test \ + findpts_el_2_test findpts_el_2_test2 \ + findpts_el_3_test findpts_el_3_test2 \ + findpts_local_test findpts_test + +CRS=$(AMG) + +tests: $(TESTS) ; +clean: ; @$(RM) $(TESTS) *.o *.s + +cmds: ; @echo CC = $(CCCMD); echo LINK = $(LINKCMD); + +deps: ; ./cdep.py *.c > makefile.cdep; + +odepinfo: deps objects; @./odep_info.py *.o + +-include makefile.cdep + +%.o: %.c ; @echo CC $<; $(CCCMD) -c $< -o $@ +%.s: %.c ; @echo CC -S $<; $(CCCMD) -S $< -o $@ +objects: $(OBJECTS) ; + +poly_imp.h: gen_poly_imp.c + $(RM) poly_imp.h; + $(CC) -lgmp -lm gen_poly_imp.c -o gen_poly_imp; + ./gen_poly_imp > poly_imp.h; + $(RM) gen_poly_imp + +GS_OBJECTS=gs.o sort.o sarray_transfer.o sarray_sort.o \ + gs_local.o fail.o crystal.o comm.o tensor.o + +XXT=sparse_cholesky.o xxt.o +AMG=amg.o + +sort_test: sort.o fail.o comm.o tensor.o gs_local.o sort_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +sort_test2: sort.o fail.o comm.o tensor.o gs_local.o sort_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +sarray_sort_test: sort.o fail.o comm.o tensor.o gs_local.o sarray_sort.o sarray_sort_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +spchol_test: sparse_cholesky.o sort.o fail.o comm.o tensor.o gs_local.o spchol_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +comm_test: fail.o comm.o tensor.o gs_local.o comm_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +crystal_test: fail.o crystal.o comm.o tensor.o gs_local.o crystal_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +sarray_transfer_test: sarray_transfer.o sarray_sort.o sort.o fail.o crystal.o comm.o tensor.o gs_local.o sarray_transfer_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ + +gs_test: gs_test.o $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +gs_test_old: gs_test_old.o $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +gs_unique_test: gs_unique_test.o $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +xxt_test: xxt_test.o $(CRS) $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +xxt_test2: xxt_test2.o $(CRS) $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +crs_test: crs_test.o $(CRS) $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ + +poly_test2: poly.o fail.o comm.o tensor.o gs_local.o poly_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +poly_test: poly.o fail.o comm.o tensor.o gs_local.o poly_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +lob_bnd_test: tensor.o poly.o lob_bnd.o fail.o comm.o gs_local.o lob_bnd_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +obbox_test: rand_elt_test.o poly.o obbox.o tensor.o lob_bnd.o fail.o comm.o gs_local.o obbox_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_2_test2: tensor.o rand_elt_test.o lob_bnd.o fail.o comm.o gs_local.o poly.o findpts_el_2.o findpts_el_2_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_2_test: poly.o fail.o comm.o tensor.o gs_local.o findpts_el_2.o findpts_el_2_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_3_test2: tensor.o rand_elt_test.o lob_bnd.o fail.o comm.o gs_local.o poly.o findpts_el_3.o findpts_el_3_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_3_test: poly.o fail.o comm.o tensor.o gs_local.o findpts_el_3.o findpts_el_3_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_local_test: rand_elt_test.o lob_bnd.o fail.o comm.o tensor.o gs_local.o poly.o findpts_local.o sort.o sarray_sort.o obbox.o findpts_el_3.o findpts_el_2.o findpts_local_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_test: sarray_transfer.o sort.o rand_elt_test.o lob_bnd.o poly.o findpts.o sarray_sort.o findpts_local.o obbox.o tensor.o findpts_el_3.o findpts_el_2.o fail.o crystal.o comm.o gs_local.o findpts_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ + diff --git a/src/jl/README b/src/jl/README new file mode 100644 index 0000000..36c66ab --- /dev/null +++ b/src/jl/README @@ -0,0 +1,69 @@ + +A high-level view of the code in this directory is as follows. See each header +file listed for more documentation. + +The following headers are fundamental to most of the code. + + name.h: a given prefix is added to all external symbols; + determines how FORTRAN routines are named + types.h: defines the integer types used everywhere (e.g., for array indices) + mem.h: memory-management wrappers; + "array" type (generic dynamically sized array); + "buffer" type ( = char array ) + comm.h: wrappers for MPI calls (with alternative single proc versions) + +The Gather/Scatter library top-level interface is defined in "gs.h". +The file "gs_defs.h" defines the datatypes and operations that it supports. + +There are two coarse solvers (XXT and AMG), which are not currently very well +documented. The interface is given in "crs.h". + +"findpts" is documented in "findpts.c". The idea is that during a run of an +SEM code, we have a geometry map + (processor, element, r, s, t) -> (x, y, z) +that defines our mesh. Within each element, the xyz coordinate is a +polynomial function of the parametric r,s,t coordinates. +"findpts" takes a distributed list of "(x,y,z)" points and computes the inverse +of the above map. +"findpts_eval" takes a list of "(proc,el,r,s,t)" coords, e.g., as returned by + "findpts", and interpolates a given field at each point. + + +The "workhorses" of the implementations of much of the above are the +"sarray_sort" and "sarray_transfer" routines, documented in the respective +headers. The "array" type, defined in "mem.h", can be used to keep track of a +dynamically sized array of (arbitrary) structs. + + sarray_sort.h: + sort an array of structs (locally/sequentially) by one or two of its fields + sarray_transfer.h: + transfer each struct in array to the processor specified by a given field + +These in turn, are implemented using the lower-level routines of +"sort.h", and "crystal_router.h". + + +The "findpts" algorithm makes use of a number of lower-level routines +possibly useful on their own. + + poly.h: computation of quadrature nodes; fast polynomial interpolation + lob_bnd.h: (relatively) fast yet robust bounds for polynomials on [-1,1]^d + obbox.h: oriented as well as axis-aligned bounding boxes for spectral els + tensor.h: some tensor-product applications, + with BLAS ops delegated to Nek, cblas, or a naive imp + +All of the preprocessor macros that affect compilation are: + name.h: PREFIX="..." prefix added to all C external symbols + FPREFIX="..." prefix added to all FORTRAN routines + UPCASE, UNDERSCORE determines FORTRAN naming convention + types.h: USE_LONG, USE_LONG_LONG, GLOBAL_LONG, GLOBAL_LONG_LONG + determine the integer types used by all code + mem.h: PRINT_MALLOCS=1 (print all mem mngmt to stdout) + comm.h: MPI (use MPI when defined; + otherwise, use a dummy single-proc implementation) + tensor.h: USE_CBLAS, USE_NAIVE_BLAS + (select BLAS implementation; default is Nek's mxm) + fail.c: NO_NEK_EXITT when defined, don't call Nek's exitt routine + amg.c: AMG_BLOCK_ROWS number of rows to read at a time (default=1200) + GS_TIMING record timings for the matrix multiplies + GS_BARRIER use a barrier to improve the quality of the timings diff --git a/src/jl/c99.h b/src/jl/c99.h new file mode 100644 index 0000000..a5a44e3 --- /dev/null +++ b/src/jl/c99.h @@ -0,0 +1,16 @@ +#ifndef C99_H +#define C99_H + +#ifndef __STDC_VERSION__ +# define NO_C99 +#elif __STDC_VERSION__ < 199901L +# define NO_C99 +#endif + +#ifdef NO_C99 +# define restrict +# define inline +# undef NO_C99 +#endif + +#endif diff --git a/src/jl/cdep.py b/src/jl/cdep.py new file mode 100755 index 0000000..a0dd87a --- /dev/null +++ b/src/jl/cdep.py @@ -0,0 +1,33 @@ +#!/usr/bin/python + +import sys, os, re + +#mergestr = lambda x: reduce((lambda a,b: a+" "+b),x,"") + +pathjoin = lambda a,b: os.path.normpath(os.path.join(a,b)) +include_re = re.compile("\s*#\s*include\s*\"([^\"]*)\"") +incmatch = lambda x: ( include_re.match(line) for line in open(x) ) +incline = lambda x,m: pathjoin(os.path.split(x)[0],m.group(1)) +incl = lambda x: [ incline(x,m) for m in incmatch(x) if m!=None ] +includes = {} +def get_include(x): + if not includes.has_key(x): includes[x] = incl(x) + return includes[x] + +def closure(seq,f): + v = [], [x for x in seq], set(x for x in seq) + while len(v[1]): [(v[1].append(y),v[2].add(y)) for y in + f((lambda x: (v[0].append(x),x)[1])(v[1].pop())) if not y in v[2]] + return v[0] + +src_files = sys.argv[1:] +files = closure(src_files, get_include) +deps = dict((x,closure(includes[x],lambda y: includes[y])) for x in src_files) + +obj = lambda x: os.path.splitext(x)[0]+".o" + +for x in src_files: + print obj(x)+": "+x+reduce((lambda a,b: a+" "+b),deps[x],"") + +print +print "OBJECTS="+reduce((lambda a,b: a+" "+obj(b)),src_files,"") diff --git a/src/jl/comm.c b/src/jl/comm.c new file mode 100644 index 0000000..8e5c9a3 --- /dev/null +++ b/src/jl/comm.c @@ -0,0 +1,175 @@ +#include /* for size_t */ +#include /* for exit */ +#include /* memcpy */ +#include /* for gs identities */ +#include /* for gs identities */ +#include "name.h" +#include "fail.h" +#include "types.h" +#include "tensor.h" +#include "gs_defs.h" +#include "gs_local.h" +#include "comm.h" + +uint comm_gbl_id=0, comm_gbl_np=1; + +GS_DEFINE_IDENTITIES() +GS_DEFINE_DOM_SIZES() + +static void scan_imp(void *scan, const struct comm *com, gs_dom dom, gs_op op, + const void *v, uint vn, void *buffer) +{ + comm_req req[2]; + size_t vsize = vn*gs_dom_size[dom]; + const uint id=com->id, np=com->np; + uint n = np, c=1, odd=0, base=0; + void *buf[2]; + void *red = (char*)scan+vsize; + buf[0]=buffer,buf[1]=(char*)buffer+vsize; + while(n>1) { + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } + gs_init_array(scan,vn,dom,op); + memcpy(red,v,vsize); + while(n>=1, n<<=1, n+=(odd&1); + odd>>=1; + if(base==id) { + comm_irecv(&req[0],com, buf[0],vsize, id+n/2,id+n/2); + comm_isend(&req[1],com, red ,vsize, id+n/2,id); + comm_wait(req,2); + gs_gather_array(red,buf[0],vn,dom,op); + } else { + comm_irecv(&req[0],com, scan,vsize, base,base); + comm_isend(&req[1],com, red ,vsize, base,id); + comm_wait(req,2); + break; + } + } + while(n>1) { + if(base==id) { + comm_send(com, scan ,2*vsize, id+n/2,id); + } else { + comm_recv(com, buffer,2*vsize, base,base); + gs_gather_array(scan,buf[0],vn,dom,op); + memcpy(red,buf[1],vsize); + } + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } +} + + +static void allreduce_imp(const struct comm *com, gs_dom dom, gs_op op, + void *v, uint vn, void *buf) +{ + size_t total_size = vn*gs_dom_size[dom]; + const uint id=com->id, np=com->np; + uint n = np, c=1, odd=0, base=0; + while(n>1) { + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } + while(n>=1, n<<=1, n+=(odd&1); + odd>>=1; + if(base==id) { + comm_recv(com, buf,total_size, id+n/2,id+n/2); + gs_gather_array(v,buf,vn, dom,op); + } else { + comm_send(com, v,total_size, base,id); + break; + } + } + while(n>1) { + if(base==id) + comm_send(com, v,total_size, id+n/2,id); + else + comm_recv(com, v,total_size, base,base); + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } +} + +void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op, + const void *v, uint vn, void *buffer) +{ + scan_imp(scan, com,dom,op, v,vn, buffer); +} + +void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op, + void *v, uint vn, void *buf) +{ + if(vn==0) return; +#ifdef MPI + { + MPI_Datatype mpitype; + MPI_Op mpiop; + #define DOMAIN_SWITCH() do { \ + switch(dom) { case gs_double: mpitype=MPI_DOUBLE; break; \ + case gs_float: mpitype=MPI_FLOAT; break; \ + case gs_int: mpitype=MPI_INT; break; \ + case gs_long: mpitype=MPI_LONG; break; \ + WHEN_LONG_LONG(case gs_long_long: mpitype=MPI_LONG_LONG; break;) \ + default: goto comm_allreduce_byhand; \ + } \ + } while(0) + DOMAIN_SWITCH(); + #undef DOMAIN_SWITCH + switch(op) { case gs_add: mpiop=MPI_SUM; break; + case gs_mul: mpiop=MPI_PROD; break; + case gs_min: mpiop=MPI_MIN; break; + case gs_max: mpiop=MPI_MAX; break; + default: goto comm_allreduce_byhand; + } + MPI_Allreduce(v,buf,vn,mpitype,mpiop,com->c); + memcpy(v,buf,vn*gs_dom_size[dom]); + return; + } +#endif +#ifdef MPI +comm_allreduce_byhand: + allreduce_imp(com,dom,op, v,vn, buf); +#endif +} + +double comm_dot(const struct comm *comm, double *v, double *w, uint n) +{ + double s=tensor_dot(v,w,n),b; + comm_allreduce(comm,gs_double,gs_add, &s,1, &b); + return s; +} + +/* T comm_reduce__T(const struct comm *comm, gs_op op, const T *in, uint n) */ + +#define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break; +#define SWITCH_OP(T,op) do switch(op) { \ + GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0) + +#define WITH_OP(T,OP) \ + do { T v = *in++; GS_DO_##OP(accum,v); } while(--n) + +#define DEFINE_REDUCE(T) \ +T PREFIXED_NAME(comm_reduce__##T)( \ + const struct comm *comm, gs_op op, const T *in, uint n) \ +{ \ + T accum = gs_identity_##T[op], buf; \ + if(n!=0) SWITCH_OP(T,op); \ + comm_allreduce(comm,gs_##T,op, &accum,1, &buf); \ + return accum; \ +} + +GS_FOR_EACH_DOMAIN(DEFINE_REDUCE) + +#undef DEFINE_REDUCE +#undef WITH_OP +#undef SWITCH_OP +#undef SWITCH_OP_CASE + diff --git a/src/jl/comm.h b/src/jl/comm.h new file mode 100644 index 0000000..4d0ed3e --- /dev/null +++ b/src/jl/comm.h @@ -0,0 +1,255 @@ +#ifndef COMM_H +#define COMM_H + +/* requires: + for size_t + for exit + "fail.h", "types.h" + "gs_defs.h" for comm_allreduce, comm_scan, comm_reduce_T +*/ + +#if !defined(FAIL_H) || !defined(TYPES_H) +#warning "comm.h" requires "fail.h" and "types.h" +#endif + +/* + When the preprocessor macro MPI is defined, defines (very) thin wrappers + for the handful of used MPI routines. Alternatively, when MPI is not defined, + these wrappers become dummy routines suitable for a single process run. + No code outside of "comm.h" and "comm.c" makes use of MPI at all. + + Basic usage: + + struct comm c; + + comm_init(&c, MPI_COMM_WORLD); // initializes c using MPI_Comm_dup + + comm_free(&c); + + Very thin MPI wrappers: (see below for implementation) + + comm_send,_recv,_isend,_irecv,_time,_barrier + + Additionally, some reduction and scan routines are provided making use + of the definitions in "gs_defs.h" (provided this has been included first). + + Example comm_allreduce usage: + + double v[5], buf[5]; + comm_allreduce(&c, gs_double,gs_add, v,5,buf); + // Computes the vector sum of v across all procs, using + // buf as a scratch area. Delegates to MPI_Allreduce if possible. + + Example comm_scan usage: + + long in[5], out[2][5], buf[2][5]; + comm_scan(out, &c,gs_long,gs_add, in,5,buf); + // out[0] will be the vector sum of "in" across procs with ids + *strictly* less than this one (exclusive behavior), + and out[1] will be the vector sum across all procs, as would + be computed with comm_allreduce. + Note: differs from MPI_Scan which has inclusive behavior + + Example comm_reduce_double, etc. usage: + + T out, in[10]; + out = comm_reduce_T(&c, gs_max, in, 10); + // out will equal the largest element of "in", + across all processors + // T can be "double", "float", "int", "long", "slong", "sint", etc. + as defined in "gs_defs.h" + +*/ + +#ifdef MPI +#include +typedef MPI_Comm comm_ext; +typedef MPI_Request comm_req; +#else +typedef int comm_ext; +typedef int comm_req; +typedef int MPI_Fint; +#endif + +#define comm_allreduce PREFIXED_NAME(comm_allreduce) +#define comm_scan PREFIXED_NAME(comm_scan ) +#define comm_dot PREFIXED_NAME(comm_dot ) + +/* global id, np vars strictly for diagnostic messages (fail.c) */ +#ifndef comm_gbl_id +#define comm_gbl_id PREFIXED_NAME(comm_gbl_id) +#define comm_gbl_np PREFIXED_NAME(comm_gbl_np) +extern uint comm_gbl_id, comm_gbl_np; +#endif + +struct comm { + uint id, np; + comm_ext c; +}; + +static void comm_init(struct comm *c, comm_ext ce); +/* (macro) static void comm_init_check(struct comm *c, MPI_Fint ce, uint np); */ +/* (macro) static void comm_dup(struct comm *d, const struct comm *s); */ +static void comm_free(struct comm *c); +static double comm_time(void); +static void comm_barrier(const struct comm *c); +static void comm_recv(const struct comm *c, void *p, size_t n, + uint src, int tag); +static void comm_send(const struct comm *c, void *p, size_t n, + uint dst, int tag); +static void comm_irecv(comm_req *req, const struct comm *c, + void *p, size_t n, uint src, int tag); +static void comm_isend(comm_req *req, const struct comm *c, + void *p, size_t n, uint dst, int tag); +static void comm_wait(comm_req *req, int n); + +double comm_dot(const struct comm *comm, double *v, double *w, uint n); + +#ifdef GS_DEFS_H +void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op, + void *v, uint vn, void *buf); +void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op, + const void *v, uint vn, void *buffer); + +#define DEFINE_REDUCE(T) \ +T PREFIXED_NAME(comm_reduce__##T)( \ + const struct comm *comm, gs_op op, const T *in, uint n); \ +static T comm_reduce_##T(const struct comm *c, gs_op op, const T *v, uint vn) \ +{ return PREFIXED_NAME(comm_reduce__##T)(c,op,v,vn); } +GS_FOR_EACH_DOMAIN(DEFINE_REDUCE) +#undef DEFINE_REDUCE + +#define comm_reduce_sint \ + TYPE_LOCAL(comm_reduce_int,comm_reduce_long,comm_reduce_long_long) +#define comm_reduce_slong \ + TYPE_GLOBAL(comm_reduce_int,comm_reduce_long,comm_reduce_long_long) + +#endif + +/*---------------------------------------------------------------------------- + Code for static (inline) functions + ----------------------------------------------------------------------------*/ + +static void comm_init(struct comm *c, comm_ext ce) +{ +#ifdef MPI + int i; + MPI_Comm_dup(ce, &c->c); + MPI_Comm_rank(c->c,&i), comm_gbl_id=c->id=i; + MPI_Comm_size(c->c,&i), comm_gbl_np=c->np=i; +#else + c->id = 0, c->np = 1; +#endif +} + +static void comm_init_check_(struct comm *c, MPI_Fint ce, uint np, + const char *file, unsigned line) +{ +#ifdef MPI + comm_init(c,MPI_Comm_f2c(ce)); + if(c->np != np) + fail(1,file,line,"comm_init_check: passed P=%u, " + "but MPI_Comm_size gives P=%u",np,c->np); +#else + comm_init(c,0); + if(np != 1) + fail(1,file,line,"comm_init_check: passed P=%u, " + "but not compiled with -DMPI",np); +#endif +} +#define comm_init_check(c,ce,np) comm_init_check_(c,ce,np,__FILE__,__LINE__) + + +static void comm_dup_(struct comm *d, const struct comm *s, + const char *file, unsigned line) +{ + d->id = s->id, d->np = s->np; +#ifdef MPI + MPI_Comm_dup(s->c,&d->c); +#else + if(s->np!=1) fail(1,file,line,"%s not compiled with -DMPI\n",file); +#endif +} +#define comm_dup(d,s) comm_dup_(d,s,__FILE__,__LINE__) + +static void comm_free(struct comm *c) +{ +#ifdef MPI + MPI_Comm_free(&c->c); +#endif +} + +static double comm_time(void) +{ +#ifdef MPI + return MPI_Wtime(); +#else + return 0; +#endif +} + +static void comm_barrier(const struct comm *c) +{ +#ifdef MPI + MPI_Barrier(c->c); +#endif +} + +static void comm_recv(const struct comm *c, void *p, size_t n, + uint src, int tag) +{ +#ifdef MPI +# ifndef MPI_STATUS_IGNORE + MPI_Status stat; + MPI_Recv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,&stat); +# else + MPI_Recv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,MPI_STATUS_IGNORE); +# endif +#endif +} + +static void comm_send(const struct comm *c, void *p, size_t n, + uint dst, int tag) +{ +#ifdef MPI + MPI_Send(p,n,MPI_UNSIGNED_CHAR,dst,tag,c->c); +#endif +} + +static void comm_irecv(comm_req *req, const struct comm *c, + void *p, size_t n, uint src, int tag) +{ +#ifdef MPI + MPI_Irecv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,req); +#endif +} + +static void comm_isend(comm_req *req, const struct comm *c, + void *p, size_t n, uint dst, int tag) +{ +#ifdef MPI + MPI_Isend(p,n,MPI_UNSIGNED_CHAR,dst,tag,c->c,req); +#endif +} + +static void comm_wait(comm_req *req, int n) +{ +#ifdef MPI +# ifndef MPI_STATUSES_IGNORE + MPI_Status status[8]; + while(n>=8) MPI_Waitall(8,req,status), req+=8, n-=8; + if(n>0) MPI_Waitall(n,req,status); +# else + MPI_Waitall(n,req,MPI_STATUSES_IGNORE); +# endif +#endif +} + +static void comm_bcast(const struct comm *c, void *p, size_t n, uint root) +{ +#ifdef MPI + MPI_Bcast(p,n,MPI_UNSIGNED_CHAR,root,c->c); +#endif +} + +#endif diff --git a/src/jl/crs.h b/src/jl/crs.h new file mode 100644 index 0000000..e2d0d36 --- /dev/null +++ b/src/jl/crs.h @@ -0,0 +1,24 @@ +#ifndef CRS_H +#define CRS_H + +#if !defined(COMM_H) +#warning "crs.h" requires "comm.h" +#endif + +#define crs_setup PREFIXED_NAME(crs_setup) +#define crs_solve PREFIXED_NAME(crs_solve) +#define crs_stats PREFIXED_NAME(crs_stats) +#define crs_free PREFIXED_NAME(crs_free ) + +struct crs_data; + +struct crs_data *crs_setup( + uint n, const ulong *id, + uint nz, const uint *Ai, const uint *Aj, const double *A, + uint null_space, const struct comm *comm); +void crs_solve(double *x, struct crs_data *data, double *b); +void crs_stats(struct crs_data *data); +void crs_free(struct crs_data *data); + +#endif + diff --git a/src/jl/crs_test.c b/src/jl/crs_test.c new file mode 100644 index 0000000..e5367d2 --- /dev/null +++ b/src/jl/crs_test.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "gs_defs.h" +#include "comm.h" +#include "gs.h" +#include "crs.h" + +void test(const struct comm *const comm) +{ + const double A[16] = { 2, -1, -1, 0, + -1, 2, 0, -1, + -1, 0, 2, -1, + 0, -1, -1, 2 }; + const uint Ai[16] = { 0, 0, 0, 0, + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3 }, + Aj[16] = { 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3 }; + ulong xid[4]; slong uid[4]; + double x[4]={1,1,1,1}, b[4], bmean; + uint i, w, gn, px, py; + + slong *xgid=0; double *xg=0; struct gs_data *gsh; + + struct crs_data *crs; + + w = ceil(sqrt(comm->np)); gn = (w+1)*(w+1); + + if(comm->id==0) printf("arranging procs in a %u x %u square\n", w, w); + + px = comm->id%w, py = comm->id/w; + b[0] = xid[0] = (w+1)*py +px+1; + b[1] = xid[1] = (w+1)*py +px+2; + b[2] = xid[2] = (w+1)*(py+1)+px+1; + b[3] = xid[3] = (w+1)*(py+1)+px+2; + + gn = comm_reduce_slong(comm, gs_max, (const slong*)&xid[3],1); + bmean = comm_reduce_double(comm, gs_add, b,4)/gn; + + gsh = gs_setup((const slong*)xid,4, comm,0,gs_crystal_router,0); + gs(x,gs_double,gs_add,0,gsh,0); + gs(b,gs_double,gs_add,0,gsh,0); + for(i=0;i<4;++i) b[i]=xid[i]-bmean/x[i]; + gs(b,gs_double,gs_add,0,gsh,0); + gs_free(gsh); + + gsh = gs_setup((const slong*)xid,4, comm,1,gs_crystal_router,0); + for(i=0;i<4;++i) uid[i]=comm->id; + gs(uid,gs_slong,gs_min,0,gsh,0); + gs_free(gsh); + for(i=0;i<4;++i) uid[i] = (uid[i]==comm->id?(slong)xid[i]:-(slong)xid[i]); + + if(comm->id==0) { + xgid = tmalloc(slong, gn); + xg = tmalloc(double,gn); + for(i=0;iid?uid:xgid,comm->id?4:gn, comm,0,gs_crystal_router,0); + + + if(comm->id==0) for(i=0;i<4;++i) xg[xid[i]-1]=b[i]; + gs(comm->id?b:xg,gs_double,gs_add, 0, gsh, 0); + if(comm->id==0) for(i=0;iid==0) for(i=0;i<4;++i) xg[xid[i]-1]=x[i]; + gs(comm->id?x:xg,gs_double,gs_add, 0, gsh, 0); + if(comm->id==0) for(i=0;iid==0) free(xg), free(xgid); +} + +int main(int narg, char* arg[]) +{ + comm_ext world; int np; + struct comm comm; +#ifdef MPI + MPI_Init(&narg,&arg); + world = MPI_COMM_WORLD; + MPI_Comm_size(world,&np); +#else + world=0, np=1; +#endif + + comm_init(&comm,world); + test(&comm); + comm_free(&comm); + +#ifdef MPI + MPI_Finalize(); +#endif + + return 0; +} + diff --git a/src/jl/crystal.c b/src/jl/crystal.c new file mode 100644 index 0000000..a0e8135 --- /dev/null +++ b/src/jl/crystal.c @@ -0,0 +1,141 @@ +/*------------------------------------------------------------------------------ + + Crystal Router + + Accomplishes all-to-all communication in log P msgs per proc + The routine is low-level; the format of the input/output is an + array of integers, consisting of a sequence of messages with format: + + target proc + source proc + m + integer + integer + ... + integer (m integers in total) + + Before crystal_router is called, the source of each message should be + set to this proc id; upon return from crystal_router, the target of each + message will be this proc id. + + Example Usage: + + struct crystal cr; + + crystal_init(&cr, &comm); // makes an internal copy of comm + + crystal.data.n = ... ; // total number of integers (not bytes!) + buffer_reserve(&cr.data, crystal.n * sizeof(uint)); + ... // fill cr.data.ptr with messages + crystal_router(&cr); + + crystal_free(&cr); + + ----------------------------------------------------------------------------*/ + +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" + +#define crystal_init PREFIXED_NAME(crystal_init ) +#define crystal_free PREFIXED_NAME(crystal_free ) +#define crystal_router PREFIXED_NAME(crystal_router) + +struct crystal { + struct comm comm; + buffer data, work; +}; + +void crystal_init(struct crystal *p, const struct comm *comm) +{ + comm_dup(&p->comm, comm); + buffer_init(&p->data,1000); + buffer_init(&p->work,1000); +} + +void crystal_free(struct crystal *p) +{ + comm_free(&p->comm); + buffer_free(&p->data); + buffer_free(&p->work); +} + +static void uintcpy(uint *dst, const uint *src, uint n) +{ + if(dst+n<=src) memcpy (dst,src,n*sizeof(uint)); + else if(dst!=src) memmove(dst,src,n*sizeof(uint)); +} + +static uint crystal_move(struct crystal *p, uint cutoff, int send_hi) +{ + uint len, *src, *end; + uint *keep = p->data.ptr, *send; + uint n = p->data.n; + send = buffer_reserve(&p->work,n*sizeof(uint)); + if(send_hi) { /* send hi, keep lo */ + for(src=keep,end=keep+n; src=cutoff) memcpy (send,src,len*sizeof(uint)), send+=len; + else uintcpy(keep,src,len), keep+=len; + } + } else { /* send lo, keep hi */ + for(src=keep,end=keep+n; srcdata.n = keep - (uint*)p->data.ptr; + return send - (uint*)p->work.ptr; +} + +static void crystal_exchange(struct crystal *p, uint send_n, uint targ, + int recvn, int tag) +{ + comm_req req[3]; + uint count[2] = {0,0}, sum, *recv[2]; + + if(recvn) + comm_irecv(&req[1],&p->comm, &count[0],sizeof(uint), targ ,tag); + if(recvn==2) + comm_irecv(&req[2],&p->comm, &count[1],sizeof(uint), p->comm.id-1,tag); + comm_isend(&req[0],&p->comm, &send_n,sizeof(uint), targ,tag); + comm_wait(req,recvn+1); + + sum = p->data.n + count[0] + count[1]; + buffer_reserve(&p->data,sum*sizeof(uint)); + recv[0] = (uint*)p->data.ptr + p->data.n, recv[1] = recv[0] + count[0]; + p->data.n = sum; + + if(recvn) comm_irecv(&req[1],&p->comm, + recv[0],count[0]*sizeof(uint), targ ,tag+1); + if(recvn==2) comm_irecv(&req[2],&p->comm, + recv[1],count[1]*sizeof(uint), p->comm.id-1,tag+1); + comm_isend(&req[0],&p->comm, p->work.ptr,send_n*sizeof(uint), targ,tag+1); + comm_wait(req,recvn+1); +} + +void crystal_router(struct crystal *p) +{ + uint bl=0, bh, nl; + uint id = p->comm.id, n=p->comm.np; + uint send_n, targ, tag = 0; + int send_hi, recvn; + while(n>1) { + nl = (n+1)/2, bh = bl+nl; + send_hi = id +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "crystal.h" + +int main(int narg, char *arg[]) +{ + comm_ext world; int np; + struct comm comm; + struct crystal cr; + uint i,sum, *data, *end; +#ifdef MPI + MPI_Init(&narg,&arg); + world = MPI_COMM_WORLD; + MPI_Comm_size(world,&np); +#else + world=0, np=1; +#endif + + comm_init(&comm,world); + + crystal_init(&cr,&comm); + + cr.data.n = (4+(comm.id&1))*comm.np; + buffer_reserve(&cr.data,cr.data.n*sizeof(uint)); + data = cr.data.ptr; + for(i=0;i %u:",data[1],data[0]); + for(i=0;i /* sprintf, vfprintf, stdout */ +#include /* va_list, va_start, ... */ +#include /* exit */ +#include /* memcpy, and str* functions in comm_fail */ +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" + +#define nek_exitt FORTRAN_UNPREFIXED(exitt,EXITT) +void die(int status) +{ +#ifdef NO_NEK_EXITT + if(comm_gbl_id==0) exit(status); else for(;;) ; +#else + exit(status); +#endif +} + +void vdiagnostic(const char *prefix, const char *file, unsigned line, + const char *fmt, va_list ap) +{ + static char buf[2048]; int n,na,i=0; + sprintf(buf,"%s(proc %04d, %s:%d): ",prefix,(int)comm_gbl_id,file,line); + vsprintf(buf+strlen(buf),fmt,ap); + strcat(buf,"\n"); + n=strlen(buf); + while(n && (na=fwrite(buf+i,1,n,stdout))) n-=na, i+=na; + fflush(stdout); +} + +void diagnostic(const char *prefix, const char *file, unsigned line, + const char *fmt, ...) +{ + va_list ap; va_start(ap,fmt); + vdiagnostic(prefix,file,line,fmt,ap); + va_end(ap); +} + +void vfail(int status, const char *file, unsigned line, + const char *fmt, va_list ap) +{ + vdiagnostic("ERROR ",file,line,fmt,ap); + die(status); +} + +void fail(int status, const char *file, unsigned line, + const char *fmt, ...) +{ + va_list ap; va_start(ap,fmt); + vfail(status,file,line,fmt,ap); + va_end(ap); +} diff --git a/src/jl/fail.h b/src/jl/fail.h new file mode 100644 index 0000000..0185110 --- /dev/null +++ b/src/jl/fail.h @@ -0,0 +1,52 @@ +#ifndef FAIL_H +#define FAIL_H + +#if !defined(NAME_H) +#warning "fail.h" requires "name.h" +#endif + +#define die PREFIXED_NAME( die ) +#define vdiagnostic PREFIXED_NAME(vdiagnostic) +#define diagnostic PREFIXED_NAME( diagnostic) +#define vfail PREFIXED_NAME(vfail ) +#define fail PREFIXED_NAME( fail ) + +#ifdef __GNUC__ +# define ATTRBD __attribute__ ((noreturn)) +# define ATTRB4V __attribute__ ((format(printf,4,0))) +# define ATTRB4 __attribute__ ((format(printf,4,5))) +# define ATTRB4DV __attribute__ ((noreturn,format(printf,4,0))) +# define ATTRB4D __attribute__ ((noreturn,format(printf,4,5))) +#else +# define ATTRBD +# define ATTRB4V +# define ATTRB4 +# define ATTRB4DV +# define ATTRB4D +#endif + +#define DEF_FUNS() \ + void die(int status) ATTRBD; \ + void diagnostic(const char *prefix, const char *file, unsigned line, \ + const char *fmt, ...) ATTRB4 ; \ + void fail (int status, const char *file, unsigned line, \ + const char *fmt, ...) ATTRB4D ; +#define VDEF_FUNS() \ + void vdiagnostic(const char *prefix, const char *file, unsigned line, \ + const char *fmt, va_list ap) ATTRB4V ; \ + void vfail (int status, const char *file, unsigned line, \ + const char *fmt, va_list ap) ATTRB4DV ; +DEF_FUNS() +#ifdef va_arg +VDEF_FUNS() +#endif + +#undef VDEF_FUNS +#undef DEF_FUNS +#undef ATTRB4D +#undef ATTRB4DV +#undef ATTRB4 +#undef ATTRB4V +#undef ATTRBD + +#endif diff --git a/src/jl/fcrystal.c b/src/jl/fcrystal.c new file mode 100644 index 0000000..3fe4c9a --- /dev/null +++ b/src/jl/fcrystal.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "comm.h" +#include "crystal.h" +#include "sort.h" +#include "sarray_sort.h" +#include "sarray_transfer.h" + +/*-------------------------------------------------------------------------- + + FORTRAN Interface to crystal router + + integer h, np + MPI_Comm comm + call crystal_setup(h,comm,np) ! set h to handle to new instance + ! it is a runtime error if MPI_Comm_size gives a value different than np + call crystal_free(h) ! release instance + + integer*? ituple(m,max) ! integer type matching sint from "types.h" + call crystal_ituple_transfer(h, ituple,m,n,max, kp) + - moves each column ituple(:,i), 1 <= i <= n, + to proc ituple(kp,i) + - sets n to the number of columns received, + which may be larger than max (indicating loss of n-max columns) + - also sets ituple(kp,i) to the source proc of column ituple(:,i) + + call crystal_ituple_sort(h, ituple,m,n, key,nkey) + - locally sorts columns ituple(:,1...n) in ascending order, + ranked by ituple(key(1),i), + then ituple(key(2),i), + ... + then ituple(key(nkey),i) + - no communication; h used for scratch area + - linear time + - assumes nonnegative integers + + integer*? vi(mi,max) ! integer type matching sint from "types.h" + integer*? vl(ml,max) ! integer type matching slong from "types.h" + real vr(mr,max) + call crystal_tuple_transfer(h,n,max, vi,mi,vl,ml,vr,mr, kp) + - moves each column vi(:,i),vl(:,i),vr(:,i) 1 <= i <= n, + to proc vi(kp,i) + - sets n to the number of columns received, + which may be larger than max (indicating loss of n-max columns) + - also sets vi(kp,i) to the source proc of columns vi(:,i),vl(:,i),vr(:,i) + + call crystal_tuple_sort(h,n, vi,mi,vl,ml,vr,mr, key,nkey) + - locally sorts columns vi/vl/vr (:,1...n) in ascending order, + ranked by vi(key(1),i) [ or vl(key(1)-mi,i) if key(1)>mi ], + then vi(key(2),i) [ or vl(key(2)-mi,i) if key(2)>mi ], + ... + then vi(key(nkey),i) or vl(key(nkey)-mi,i) + - no communication; h used for scratch area + - linear time + - assumes nonnegative integers + - sorting on reals not yet implemented + + --------------------------------------------------------------------------*/ + +#undef crystal_free +#define ccrystal_free PREFIXED_NAME(crystal_free) + +#define fcrystal_setup \ + FORTRAN_NAME(crystal_setup ,CRYSTAL_SETUP ) +#define fcrystal_ituple_sort \ + FORTRAN_NAME(crystal_ituple_sort ,CRYSTAL_ITUPLE_SORT ) +#define fcrystal_tuple_sort \ + FORTRAN_NAME(crystal_tuple_sort ,CRYSTAL_TUPLE_SORT ) +#define fcrystal_ituple_transfer \ + FORTRAN_NAME(crystal_ituple_transfer,CRYSTAL_ITUPLE_TRANSFER) +#define fcrystal_tuple_transfer \ + FORTRAN_NAME(crystal_tuple_transfer ,CRYSTAL_TUPLE_TRANSFER ) +#define fcrystal_free \ + FORTRAN_NAME(crystal_free ,CRYSTAL_FREE ) + +static struct crystal **handle_array = 0; +static int handle_max = 0; +static int handle_n = 0; + +void fcrystal_setup(sint *handle, const MPI_Fint *comm, const sint *np) +{ + struct crystal *p; + if(handle_n==handle_max) + handle_max+=handle_max/2+1, + handle_array=trealloc(struct crystal*,handle_array,handle_max); + handle_array[handle_n]=p=tmalloc(struct crystal,1); + comm_init_check(&p->comm, *comm, *np); + buffer_init(&p->data,1000); + buffer_init(&p->work,1000); + *handle = handle_n++; +} + +#define CHECK_HANDLE(func) do \ + if(*handle<0 || *handle>=handle_n || !handle_array[*handle]) \ + fail(1,__FILE__,__LINE__,func ": invalid handle"); \ +while(0) + +void fcrystal_ituple_sort(const sint *handle, + sint A[], const sint *m, const sint *n, + const sint keys[], const sint *nkey) +{ + const size_t size = (*m)*sizeof(sint); + sint nk = *nkey; + buffer *buf; + CHECK_HANDLE("crystal_ituple_sort"); + buf = &handle_array[*handle]->data; + if(--nk>=0) { + sortp(buf,0, (uint*)&A[keys[nk]-1],*n,size); + while(--nk>=0) + sortp(buf,1, (uint*)&A[keys[nk]-1],*n,size); + sarray_permute_buf_(ALIGNOF(sint),size,A,*n, buf); + } +} + +void fcrystal_tuple_sort(const sint *const handle, const sint *const n, + sint Ai[], const sint *const mi, + slong Al[], const sint *const ml, + double Ad[], const sint *const md, + const sint keys[], const sint *const nkey) +{ + const size_t size_i = (*mi)*sizeof(sint), + size_l = (*ml)*sizeof(slong), + size_d = (*md)*sizeof(double); + int init=0; + sint nk = *nkey; + buffer *buf; + CHECK_HANDLE("crystal_tuple_sort"); + buf = &handle_array[*handle]->data; + if(nk<=0) return; + while(--nk>=0) { + sint k = keys[nk]-1; + if(k<0 || k>=*mi+*ml) + fail(1,__FILE__,__LINE__,"crystal_tuple_sort: invalid key"); + else if(k<*mi) sortp (buf,init, (uint *)&Ai[k], *n,size_i); + else sortp_long(buf,init, (ulong*)&Al[k-*mi],*n,size_l); + init=1; + } + if(*mi) sarray_permute_buf_(ALIGNOF(sint ),size_i,Ai,*n, buf); + if(*ml) sarray_permute_buf_(ALIGNOF(slong ),size_l,Al,*n, buf); + if(*md) sarray_permute_buf_(ALIGNOF(double),size_d,Ad,*n, buf); +} + +void fcrystal_ituple_transfer(const sint *handle, + sint A[], const sint *m, sint *n, + const sint *nmax, const sint *proc_key) +{ + struct array ar, *const ar_ptr = &ar; + const unsigned size=(*m)*sizeof(sint); + CHECK_HANDLE("crystal_ituple_transfer"); + ar.ptr=A, ar.n=*n, ar.max=*nmax; + *n = sarray_transfer_many(&ar_ptr,&size,1, 1,0,1,(*proc_key-1)*sizeof(sint), + (uint*)&A[*proc_key-1],size, handle_array[*handle]); +} + +void fcrystal_tuple_transfer( + const sint *const handle, sint *const n, const sint *const max, + sint Ai[], const sint *const mi, + slong Al[], const sint *const ml, + double Ad[], const sint *const md, + const sint *const proc_key) +{ + struct array ar_i, ar_l, ar_d, *ar[3]; + unsigned size[3]; + CHECK_HANDLE("crystal_tuple_transfer"); + size[0]=*mi*sizeof(sint); + size[1]=*ml*sizeof(slong); + size[2]=*md*sizeof(double); + ar[0]=&ar_i, ar[1]=&ar_l, ar[2]=&ar_d; + ar_i.ptr=Ai,ar_l.ptr=Al,ar_d.ptr=Ad; + ar_i.n=ar_l.n=ar_d.n = *n; + ar_i.max=ar_l.max=ar_d.max=*max; + *n = sarray_transfer_many(ar,size,3, 1,0,1,(*proc_key-1)*sizeof(sint), + (uint*)&Ai[*proc_key-1],size[0], handle_array[*handle]); +} + +void fcrystal_free(sint *handle) +{ + CHECK_HANDLE("crystal_free"); + ccrystal_free(handle_array[*handle]); + free(handle_array[*handle]); + handle_array[*handle] = 0; +} + + diff --git a/src/jl/gen_poly_imp.c b/src/jl/gen_poly_imp.c new file mode 100644 index 0000000..21a7410 --- /dev/null +++ b/src/jl/gen_poly_imp.c @@ -0,0 +1,227 @@ +#include +#include +#include + +#define PREC_BITS 256 +#define DIGITS 50 + +#define GLL_LAG_FIX_MAX 16 + +#if 1 +# define STATIC "static " +#else +# define STATIC "" +#endif + + +#define PI 3.1415926535897932384626433832795028841971693993751058209749445923 + +#define DECLARE_1VAR(a) static int init=0; static mpf_t a; \ + if(!init) init=1, mpf_init(a) +#define DECLARE_2VARS(a,b) static int init=0; static mpf_t a,b; \ + if(!init) init=1, mpf_init(a), mpf_init(b) +#define DECLARE_3VARS(a,b,c) static int init=0; static mpf_t a,b,c; \ + if(!init) init=1, mpf_init(a), mpf_init(b), \ + mpf_init(c) +#define DECLARE_4VARS(a,b,c,d) static int init=0; static mpf_t a,b,c,d; \ + if(!init) init=1, mpf_init(a), mpf_init(b), \ + mpf_init(c), mpf_init(d) + +static int is_small(const mpf_t x, const mpf_t y) { + DECLARE_2VARS(xa,ya); + mpf_abs(xa,x); + mpf_abs(ya,y); + mpf_div_2exp(ya,ya,PREC_BITS-mp_bits_per_limb); + return mpf_cmp(xa,ya) < 0; +} + +typedef void fun_3term(mpf_t Pn, int n, const mpf_t x); + +#define DECLARE_THREE_TERM(name, i0_init, init_Ps, a_ip1,a_i,a_im1) \ +static void name(mpf_t Pn, int n, const mpf_t x) \ +{ \ + int i, i0_init; \ + DECLARE_4VARS(a,b,P_im1,P_i); \ + init_Ps; \ + for(i=i0+1; ii0?P_i:P_im1); \ +} + +DECLARE_THREE_TERM(legendre, i0=0,(mpf_set_ui(P_im1,1),mpf_set (P_i,x)), + i+1, 2*i+1, i ) +DECLARE_THREE_TERM(legendre_d1, i0=0,(mpf_set_ui(P_im1,0),mpf_set_ui(P_i,1)), + i , 2*i+1, i+1) +DECLARE_THREE_TERM(legendre_d2, i0=1,(mpf_set_ui(P_im1,0),mpf_set_ui(P_i,3)), + i-1, 2*i+1, i+2) + +static void newton(mpf_t x, double seed, + fun_3term *fun, fun_3term *der, int n) +{ + DECLARE_3VARS(ox,f,df); + mpf_set_d(x, seed); + do { + mpf_set(ox, x); + fun(f, n,x), der(df, n,x), mpf_div(f, f,df), mpf_sub(x, x,f); + } while(!is_small(f,x)); + fun( f, n,x), der(df, n,x), mpf_div(f, f,df), mpf_sub(x, x,f); +} + +static void gauss_node(mpf_t z, int n, int i) { + if( (n&1) && i==n/2 ) mpf_set_ui(z,0); + else newton(z, cos( (2*n-2*i-1)*(PI/2)/n ), legendre,legendre_d1,n); +} + +static void lobatto_node(mpf_t z, int n, int i) { + if( (n&1) && i==n/2 ) mpf_set_ui(z,0); + else if(i==0) mpf_set_d(z,-(double)1); + else if(i==n-1) mpf_set_ui(z,1); + else newton(z, cos( (n-1-i)*PI/(n-1) ), legendre_d1,legendre_d2,n-1); +} + +#define PRINT_LIST(i, i0,nline,n, printi,sep,sepline) \ + do { \ + int i; \ + for(i=i0;i3) { + printf("static const double gllz_%02d[%2d] = {\n ",n,n/2-1); + for(i=1;i<=n/2-1;++i) { + lobatto_node(z, n,n-1-i); + if(i!=1) printf(",\n "); + gmp_printf("%.*Fg",DIGITS,z); + } + puts("\n};\n"); + } + printf(STATIC "void gll_lag_%02d(double *restrict p, double *restrict data,\n" + " unsigned n, int d, double xh)\n{\n",n); + printf(" const double *restrict w = data;\n"); + printf(" const double x = xh*2;\n"); + #define PRINT_D(i) do { \ + printf("d%02d=x",i); \ + if(2*i+1==n) printf(" "); \ + else if(i==0) printf("+2 "); \ + else if(i==n-1) printf("-2 "); \ + else if(i=n-2?printf(" %d",n-1-(i)):printf("v1_%02d",i)) + #define PRINT_U2(i) (i<=1 ?printf(" 0"): \ + (i==2 ?printf(" 2"):printf("u2_%02d",i))) + #define PRINT_V2(i) (i>=n-2?printf(" 0"): \ + (i==n-3?printf(" 2"):printf("v2_%02d",i))) + printf("%s",";\n const double "); + PRINT_LIST(i, 1,3,n, + (PRINT_U0(i),putchar('='),PRINT_U0(i-1),printf("*d%02d",i-1)), + ",",",\n "); + printf("%s",";\n const double "); + PRINT_LIST(i, 1,3,n, + (PRINT_V0(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V0(n-i)), + ",",",\n "); + printf("%s",";\n "); + PRINT_LIST(i, 0,3,n, + (printf("p[%2d]=w[%2d]*",i,i),PRINT_U0(i),putchar('*'), + PRINT_V0(i)),"; ",";\n "); + puts(";\n if(d>0) {"); + if(n>2) { + printf("%s"," const double "); + PRINT_LIST(i, 2,2,n, + (PRINT_U1(i),putchar('='),PRINT_U1(i-1),printf("*d%02d",i-1), + putchar('+'),PRINT_U0(i-1)), + ",",",\n "); + printf("%s",";\n const double "); + PRINT_LIST(i, 2,2,n, + (PRINT_V1(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V1(n-i), + putchar('+'),PRINT_V0(n-i)), + ",",",\n "); + puts(";"); + } + for(i=0;i1) {"); + if(n>3) { + printf("%s"," const double "); + PRINT_LIST(i, 3,2,n, + (PRINT_U2(i),putchar('='),PRINT_U2(i-1),printf("*d%02d",i-1), + printf("+2*"),PRINT_U1(i-1)), + ",",",\n "); + printf("%s",";\n const double "); + PRINT_LIST(i, 3,2,n, + (PRINT_V2(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V2(n-i), + printf("+2*"),PRINT_V1(n-i)), + ",",",\n "); + puts(";"); + } + if(n<3) for(i=0;i1) + PRINT_U2(i),putchar('*'),PRINT_V0(i); + else printf(" "); + if(i>0 && i + +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" + +#ifdef _OPENMP +#include "omp.h" +#endif + +#define gs_op gs_op_t /* fix conflict with fortran */ + +#include "gs_defs.h" +#include "gs_local.h" +#include "comm.h" +#include "mem.h" +#include "sort.h" +#include "crystal.h" +#include "sarray_sort.h" +#include "sarray_transfer.h" + +#define gs PREFIXED_NAME(gs ) +#define gs_vec PREFIXED_NAME(gs_vec ) +#define gs_many PREFIXED_NAME(gs_many ) +#define gs_setup PREFIXED_NAME(gs_setup ) +#define gs_free PREFIXED_NAME(gs_free ) +#define gs_unique PREFIXED_NAME(gs_unique) + +GS_DEFINE_DOM_SIZES() + +typedef enum { mode_plain, mode_vec, mode_many, + mode_dry_run } gs_mode; + +static buffer static_buffer = null_buffer; + +static void gather_noop( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{} + +static void scatter_noop( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{} + +static void init_noop( + void *out, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{} + +/*------------------------------------------------------------------------------ + Topology Discovery +------------------------------------------------------------------------------*/ + +struct gs_topology { + ulong total_shared; /* number of globally unique shared ids */ + struct array nz; /* array of nonzero_id's, grouped by id, + sorted by primary index, then flag, then index */ + struct array sh; /* array of shared_id's, arbitrary ordering */ + struct array pr; /* array of primary_shared_id's */ +}; + +static void gs_topology_free(struct gs_topology *top) +{ + array_free(&top->pr); + array_free(&top->sh); + array_free(&top->nz); +} + +/************** Local topology **************/ + +/* nonzero_ids (local part) + + Creates an array of s_nonzeros, one per nonzero in user id array. The + output array is grouped by id. Within each group, non-flagged entries come + first; otherwise the entries within the group are sorted by the index into + the user id array. The first index in each group is the primary index, and + is stored along with each entry. The groups themselves are ordered in + increasing order of the primary index associated with the group (as opposed + to the user id). */ + +struct nonzero_id { + ulong id; uint i, flag, primary; +}; + +static void nonzero_ids(struct array *nz, + const slong *id, const uint n, buffer *buf) +{ + ulong last_id = -(ulong)1; + uint i, primary = -(uint)1; + struct nonzero_id *row, *end; + array_init(struct nonzero_id,nz,n), end=row=nz->ptr; + for(i=0;ii = i; + end->id = abs_id; + end->flag = id_i!=abs_id; + ++end; + } + nz->n = end-row; + array_resize(struct nonzero_id,nz,nz->n); + sarray_sort_2(struct nonzero_id,nz->ptr,nz->n, id,1, flag,0, buf); + for(row=nz->ptr,end=row+nz->n;row!=end;++row) { + ulong this_id = row->id; + if(this_id!=last_id) primary = row->i; + row->primary = primary; + last_id = this_id; + } + sarray_sort(struct nonzero_id,nz->ptr,nz->n, primary,0, buf); +} + +/************** Global topology **************/ + +/* construct list of all unique id's on this proc */ +struct unique_id { ulong id; uint work_proc, src_if; }; +static void unique_ids(struct array *un, const struct array *nz, const uint np) +{ + struct unique_id *un_row; + const struct nonzero_id *nz_row, *nz_end; + array_init(struct unique_id,un,nz->n), un_row=un->ptr; + for(nz_row=nz->ptr,nz_end=nz_row+nz->n;nz_row!=nz_end;++nz_row) { + if(nz_row->i != nz_row->primary) continue; + un_row->id = nz_row->id; + un_row->work_proc = nz_row->id%np; + un_row->src_if = nz_row->flag ? ~nz_row->i : nz_row->i; + ++un_row; + } + un->n = un_row - (struct unique_id*)un->ptr; +} + +/* shared_ids (global part) + + Creates an array of shared_id's from an array of nonzero_id's. Each entry + in the output identifies one id shared with one other processor p. + Note: two procs share an id only when at least one of them has it unflagged. + The primary index is i locally and ri remotely. Bit 1 of flags indicates + the local flag, bit 2 indicates the remote flag. The output has no + particular ordering. + + Also creates an array of primary_shared_id's, one for each shared id. + This struct includes ord, a global rank of the id (arbitrary, but unique). */ + +#define FLAGS_LOCAL 1 +#define FLAGS_REMOTE 2 + +/* i : local primary index + p : remote proc + ri : remote primary index + bi : buffer index (set and used during pw setup) */ +struct shared_id { + ulong id; uint i, p, ri, bi; unsigned flags; +}; + +struct primary_shared_id { + ulong id, ord; uint i; unsigned flag; +}; + + + +struct shared_id_work { ulong id,ord; uint p1, p2, i1f, i2f; }; +static void shared_ids_aux(struct array *sh, struct array *pr, uint pr_n, + struct array *wa, buffer *buf) +{ + const struct shared_id_work *w, *we; + struct shared_id *s; + struct primary_shared_id *p; + ulong last_id = -(ulong)1; + /* translate work array to output arrays */ + sarray_sort(struct shared_id_work,wa->ptr,wa->n, id,1, buf); + array_init(struct shared_id,sh,wa->n), sh->n=wa->n, s=sh->ptr; + array_init(struct primary_shared_id,pr,pr_n), p=pr->ptr; + for(w=wa->ptr,we=w+wa->n;w!=we;++w) { + uint i1f = w->i1f, i2f = w->i2f; + uint i1 = ~i1fid=w->id, s->i=i1, s->p=w->p2, s->ri=i2; + s->flags = ((i2f^i2)&FLAGS_REMOTE) | ((i1f^i1)&FLAGS_LOCAL); + ++s; + if(w->id!=last_id) { + last_id=w->id; + p->id=last_id, p->ord=w->ord, p->i=i1, p->flag=(i1f^i1)&FLAGS_LOCAL; + ++p; + } + } + pr->n = p-(struct primary_shared_id*)pr->ptr; + sarray_sort(struct primary_shared_id,pr->ptr,pr->n, i,0, buf); +} + +static ulong shared_ids(struct array *sh, struct array *pr, + const struct array *nz, struct crystal *cr) +{ + struct array un; struct unique_id *un_row, *un_end, *other; + ulong last_id = -(ulong)1; + ulong ordinal[2], n_shared=0, scan_buf[2]; + struct array wa; struct shared_id_work *w; + uint n_unique; + /* construct list of all unique id's on this proc */ + unique_ids(&un,nz,cr->comm.np); + n_unique = un.n; + /* transfer list to work procs */ + sarray_transfer(struct unique_id,&un, work_proc,1, cr); + /* group by id, put flagged entries after unflagged (within each group) */ + sarray_sort_2(struct unique_id,un.ptr,un.n, id,1, src_if,0, &cr->data); + /* count shared id's */ + for(un_row=un.ptr,un_end=un_row+un.n;un_row!=un_end;++un_row) { + ulong id = un_row->id; + if(~un_row->src_ifsrc_if) continue; + if(id==last_id) continue; + other=un_row+1; + if(other!=un_end&&other->id==id) last_id=id, ++n_shared; + } + comm_scan(ordinal, &cr->comm,gs_slong,gs_add, &n_shared,1, scan_buf); + /* there are ordinal[1] globally shared unique ids; + and ordinal[0] of those are seen by work procs of lower rank; + i.e., this work processor sees the range ordinal[0] + (0,n_shared-1) */ + /* construct list of shared ids */ + last_id = -(ulong)1; + array_init(struct shared_id_work,&wa,un.n), wa.n=0, w=wa.ptr; + for(un_row=un.ptr,un_end=un_row+un.n;un_row!=un_end;++un_row) { + ulong id = un_row->id; + uint p1 = un_row->work_proc, i1f = un_row->src_if; + if(~i1fid==id;++other) { + uint p2 = other->work_proc, i2f = other->src_if; + ulong ord; + if(id!=last_id) last_id=id, ++ordinal[0]; + ord=ordinal[0]-1; + if(wa.n+2>wa.max) + array_reserve(struct shared_id_work,&wa,wa.n+2), + w=(struct shared_id_work*)wa.ptr+wa.n; + w->id=id, w->ord=ord, w->p1=p1, w->p2=p2, w->i1f=i1f, w->i2f=i2f, ++w; + w->id=id, w->ord=ord, w->p1=p2, w->p2=p1, w->i1f=i2f, w->i2f=i1f, ++w; + wa.n+=2; + } + } + /* transfer shared list to source procs */ + sarray_transfer(struct shared_id_work,&wa, p1,0, cr); + /* fill output arrays from work array */ + shared_ids_aux(sh,pr,n_unique,&wa,&cr->data); + array_free(&un); + array_free(&wa); + return ordinal[1]; +} + +static void get_topology(struct gs_topology *top, + const slong *id, uint n, struct crystal *cr) +{ + nonzero_ids(&top->nz,id,n,&cr->data); + top->total_shared = shared_ids(&top->sh,&top->pr, &top->nz,cr); +} + +static void make_topology_unique(struct gs_topology *top, slong *id, + uint pid, buffer *buf) +{ + struct array *const nz=&top->nz, *const sh=&top->sh, *const pr=&top->pr; + struct nonzero_id *pnz; + struct shared_id *pb, *pe, *e, *out; + struct primary_shared_id *q; + + /* flag local non-primaries */ + sarray_sort(struct nonzero_id,nz->ptr,nz->n, i,0, buf); + if(id) { + struct nonzero_id *p,*e; + for(p=nz->ptr,e=p+nz->n;p!=e;++p) + if(p->i != p->primary) id[p->i]=-(slong)p->id,p->flag=1; + } else { + struct nonzero_id *p,*e; + for(p=nz->ptr,e=p+nz->n;p!=e;++p) + if(p->i != p->primary) p->flag=1; + } + sarray_sort(struct nonzero_id,nz->ptr,nz->n, primary,0, buf); + + /* assign owner among shared primaries */ + + /* create sentinel with i = -1 */ + array_reserve(struct shared_id,sh,sh->n+1); + ((struct shared_id*)sh->ptr)[sh->n].i = -(uint)1; + /* in the sorted list of procs sharing a given id, + the owner is chosen to be the j^th unflagged proc, + where j = id mod (length of list) */ + sarray_sort_2(struct shared_id,sh->ptr,sh->n, i,0, p,0, buf); + out=sh->ptr; pnz=top->nz.ptr; + for(pb=sh->ptr,e=pb+sh->n;pb!=e;pb=pe) { + uint i = pb->i, lt=0,gt=0, owner; struct shared_id *p; + while(pnz->i!=i) ++pnz; + /* note: current proc not in list */ + for(pe=pb; pe->i==i && pe->pflags&FLAGS_REMOTE)) ++lt; + for( ; pe->i==i ; ++pe) if(!(pe->flags&FLAGS_REMOTE)) ++gt; + if(!(pb->flags&FLAGS_LOCAL)) { + owner = pb->id%(lt+gt+1); + if(owner==lt) goto make_sh_unique_mine; + if(owner>lt) --owner; + } else + owner = pb->id%(lt+gt); + /* we don't own pb->id */ + if(id) id[i] = -(slong)pb->id; + pnz->flag=1; + /* we only share this id with the owner now; remove the other entries */ + for(p=pb; p!=pe; ++p) if(!(p->flags&FLAGS_REMOTE) && !(owner--)) break; + if(p!=pe) *out=*p, out->flags=FLAGS_LOCAL, ++out; + continue; + make_sh_unique_mine: + /* we own pb->id */ + if(out==pb) { out=pe; for(p=pb; p!=pe; ++p) p->flags=FLAGS_REMOTE; } + else for(p=pb; p!=pe; ++p) *out=*p,out->flags=FLAGS_REMOTE,++out; + } + sh->n = out - ((struct shared_id*)sh->ptr); + + /* set primary_shared_id flags to match */ + ((struct shared_id*)sh->ptr)[sh->n].i = -(uint)1; + sarray_sort(struct shared_id,sh->ptr,sh->n, id,1, buf); + sarray_sort(struct primary_shared_id,pr->ptr,pr->n, id,1, buf); + q=pr->ptr; + for(pb=sh->ptr,e=pb+sh->n;pb!=e;pb=pe) { + uint i=pb->i; + pe=pb; while(pe->i==i) ++pe; + if(q->id!=pb->id) printf("FAIL!!!\n"); + q->flag=pb->flags&FLAGS_LOCAL; + ++q; + } +} + + +/*------------------------------------------------------------------------------ + Divide lists for parallel execution +------------------------------------------------------------------------------*/ + +void sublist(const uint *map, uint ***slPtr) { + + // Iterate over array and count items and lists + + uint i,j; + int itemCount = 0, listCount = 0; + const uint *lmap = map; + while((i=*lmap++)!=-(unsigned int)1) { + listCount++; + + j=*lmap++; + do { + itemCount++; + } while ((j=*lmap++)!=-(unsigned int)1); + } + + // Determine number of threads and lists + + int maxThreads = 1; +#ifdef _OPENMP + maxThreads = omp_get_max_threads(); +#endif + int max = (maxThreads <= listCount) ? maxThreads : listCount; + if (max == 0) max = 1; + + // Setup sublists + + uint *subListData = tmalloc(uint, max+itemCount+2*listCount); + *slPtr = tmalloc(uint*, maxThreads); + uint **subListPtr = *slPtr; + + subListData[0] = -(unsigned int)1; + subListPtr[0] = subListData; + int nextSubList = 1; + + // Populate sublists + + int copyItemCount = 0; + lmap = map; + while((i=*lmap++)!=-(unsigned int)1) { + *subListData++ = i; + + j=*lmap++; + do { + *subListData++ = j; + copyItemCount++; + } while ((j=*lmap++)!=-(unsigned int)1); + *subListData++ = -(unsigned int) 1; + + if ( copyItemCount*max >= itemCount*nextSubList ) { + *subListData= -(unsigned int)1; + + if (copyItemCount != itemCount) { + subListData++; + subListPtr[nextSubList] = subListData; + nextSubList++; + } + } + } + + // Terminate unused sublists + + for (; nextSubList < maxThreads; nextSubList++) { + subListPtr[nextSubList] = subListData; + } + + return; +} + +void subflagged(const uint *map, uint ***slPtr) { + + // Iterate over map and count items + + int count = 0; + const uint *lmap = map; + while(*lmap++ !=-(unsigned int)1) count++; + + // Determine number of threads and sublists + + int maxThreads = 1; +#ifdef _OPENMP + maxThreads = omp_get_max_threads(); +#endif + int maxLists = (maxThreads <= count) ? maxThreads : count; + if (maxLists == 0) maxLists = 1; + + // Setup empty sublists + + uint *subFlaggedData = tmalloc(uint, maxLists+count); + *slPtr = tmalloc(uint*, maxThreads); + + subFlaggedData[0] = -(unsigned int)1; + (*slPtr)[0] = subFlaggedData; + int nextList = 1; + + // Populate sublists + + int copyCount=0; + uint i; + lmap = map; + while((i=*lmap++)!=-(unsigned int)1) { + *subFlaggedData++ = i; + copyCount++; + + if (copyCount*maxLists >= count*nextList) { + *subFlaggedData = -(unsigned int)1; + + if (copyCount != count) { + subFlaggedData++; + (*slPtr)[nextList] = subFlaggedData; + nextList++; + } + } + } + + // Terminate unused sublists + + for (; nextList < maxThreads; nextList++) { + (*slPtr)[nextList] = subFlaggedData; + } + + return; +} + +/*------------------------------------------------------------------------------ + Local setup +------------------------------------------------------------------------------*/ + +/* assumes nz is sorted by primary, then flag, then index */ +static const uint *local_map(const struct array *nz, const int ignore_flagged) +{ + uint *map, *p, count = 1; + const struct nonzero_id *row, *other, *end; +#define DO_COUNT(cond) do \ + for(row=nz->ptr,end=row+nz->n;row!=end;) { \ + ulong row_id = row->id; int any=0; \ + for(other=row+1;other!=end&&other->id==row_id&&cond;++other) \ + any=2, ++count; \ + count+=any, row=other; \ + } while(0) + if(ignore_flagged) DO_COUNT(other->flag==0); else DO_COUNT(1); +#undef DO_COUNT + p = map = tmalloc(uint,count); +#define DO_SET(cond) do \ + for(row=nz->ptr,end=row+nz->n;row!=end;) { \ + ulong row_id = row->id; int any=0; \ + *p++ = row->i; \ + for(other=row+1;other!=end&&other->id==row_id&&cond;++other) \ + any=1, *p++ = other->i; \ + if(any) *p++ = -(uint)1; else --p; \ + row=other; \ + } while(0) + if(ignore_flagged) DO_SET(other->flag==0); else DO_SET(1); +#undef DO_SET + *p = -(uint)1; + return map; +} + +static const uint *flagged_primaries_map(const struct array *nz) +{ + uint *map, *p, count=1; + const struct nonzero_id *row, *end; + for(row=nz->ptr,end=row+nz->n;row!=end;++row) + if(row->i==row->primary && row->flag==1) ++count; + p = map = tmalloc(uint,count); + for(row=nz->ptr,end=row+nz->n;row!=end;++row) + if(row->i==row->primary && row->flag==1) *p++ = row->i; + *p = -(uint)1; + return map; +} + +/*------------------------------------------------------------------------------ + Remote execution and setup +------------------------------------------------------------------------------*/ + +typedef void exec_fun( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf); +typedef void fin_fun(void *data); + +struct gs_remote { + uint buffer_size; + void *data; + exec_fun *exec; + fin_fun *fin; +}; + +typedef void setup_fun(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf); + +/*------------------------------------------------------------------------------ + Pairwise Execution +------------------------------------------------------------------------------*/ +struct pw_comm_data { + uint n; /* number of messages */ + uint *p; /* message source/dest proc */ + uint *size; /* size of message */ + uint total; /* sum of message sizes */ + size_t *offsets; +}; + +struct pw_data { + struct pw_comm_data comm[2]; + const uint *map[2]; + comm_req *req; + uint buffer_size; + uint **submap[2]; +}; + +static char *pw_exec_recvs(char *buf, const unsigned unit_size, + const struct comm *comm, + const struct pw_comm_data *c, comm_req *req) +{ + const uint *p=c->p, *size=c->size; + int i; + char *retVal = buf; + +#ifdef MPITHREADS +#pragma omp for +#endif + for (i = 0; i < c->n; i++) { + comm_irecv(&(req[i]),comm,buf+c->offsets[i]*unit_size,size[i]*unit_size,p[i],p[i]); + } + + if (c->n != 0) { + retVal += c->offsets[c->n-1]*unit_size + size[c->n-1]*unit_size; + } + + return retVal; +} + +static char *pw_exec_sends(char *buf, const unsigned unit_size, + const struct comm *comm, + const struct pw_comm_data *c, comm_req *req) +{ + const uint *p=c->p, *size=c->size; + int i; + char *retVal = buf; + +#ifdef MPITHREADS +#pragma omp for +#endif + for(i = 0; i < c->n; i++) { + comm_isend(&(req[i]),comm,buf+c->offsets[i]*unit_size,size[i]*unit_size,p[i],comm->id); + } + + if (c->n != 0) { + retVal += c->offsets[c->n-1]*unit_size + size[c->n-1]*unit_size; + } + + return retVal; +} + +static void pw_exec( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf) +{ + const struct pw_data *pwd = execdata; + static gs_scatter_fun *const scatter_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop }; + static gs_gather_fun *const gather_from_buf[] = + { &gs_gather, &gs_gather_vec, &gs_gather_vec_to_many, &gather_noop }; + const unsigned recv = 0^transpose, send = 1^transpose; + unsigned unit_size = vn*gs_dom_size[dom]; + +#ifdef MPITHREADS + char *sendbuf; +#else + static char *sendbuf; +#endif + + int thd = 0; + int inp = 0; + #ifdef _OPENMP + thd = omp_get_thread_num(); + inp = omp_in_parallel(); + #endif + + if (inp) { + /* post receives */ +#ifndef MPITHREADS + #pragma omp master +#endif + { + sendbuf = pw_exec_recvs(buf,unit_size,comm,&pwd->comm[recv],pwd->req); + } + #pragma omp barrier + + /* fill send buffer */ + scatter_to_buf[mode](sendbuf,data,vn,(pwd->submap[send])[thd],dom); + #pragma omp barrier + + /* post sends */ +#ifndef MPITHREADS + #pragma omp master +#endif + { + pw_exec_sends(sendbuf,unit_size,comm,&pwd->comm[send], + &pwd->req[pwd->comm[recv].n]); + } + #pragma omp barrier + + #pragma omp master + { + comm_wait(pwd->req,pwd->comm[0].n+pwd->comm[1].n); + } + #pragma omp barrier + + /* gather using recv buffer */ + gather_from_buf[mode](data,buf,vn,(pwd->submap[recv])[thd],dom,op); + } else { + /* post receives */ + sendbuf = pw_exec_recvs(buf,unit_size,comm,&pwd->comm[recv],pwd->req); + /* fill send buffer */ + scatter_to_buf[mode](sendbuf,data,vn,pwd->map[send],dom); + /* post sends */ + pw_exec_sends(sendbuf,unit_size,comm,&pwd->comm[send], + &pwd->req[pwd->comm[recv].n]); + comm_wait(pwd->req,pwd->comm[0].n+pwd->comm[1].n); + /* gather using recv buffer */ + gather_from_buf[mode](data,buf,vn,pwd->map[recv],dom,op); + } + +} + +/*------------------------------------------------------------------------------ + Pairwise setup +------------------------------------------------------------------------------*/ +static void pw_comm_setup(struct pw_comm_data *data, struct array *sh, + const unsigned flags_mask, buffer *buf) +{ + uint n=0,count=0, lp=-(uint)1; + struct shared_id *s, *se; + /* sort by remote processor and id (a globally consistent ordering) */ + sarray_sort_2(struct shared_id,sh->ptr,sh->n, p,0, id,1, buf); + /* assign index into buffer */ + for(s=sh->ptr,se=s+sh->n;s!=se;++s) { + if(s->flags&flags_mask) { s->bi = -(uint)1; continue; } + s->bi = count++; + if(s->p!=lp) lp=s->p, ++n; + } + data->n = n; + data->p = tmalloc(uint,2*n); + data->size = data->p + n; + data->total = count; + n = 0, lp=-(uint)1; + for(s=sh->ptr,se=s+sh->n;s!=se;++s) { + if(s->flags&flags_mask) continue; + if(s->p!=lp) { + lp=s->p; + if(n!=0) data->size[n-1] = count; + count=0, data->p[n++]=lp; + } + ++count; + } + if(n!=0) data->size[n-1] = count; + + data->offsets = malloc(sizeof(size_t)*data->n); + int i; + size_t len = 0; + for (i = 0; i < data->n; i++) { + data->offsets[i] = len; + len += data->size[i]; + } +} + +static void pw_comm_free(struct pw_comm_data *data) { free(data->p); free(data->offsets);} + +/* assumes that the bi field of sh is set */ +static const uint *pw_map_setup(struct array *sh, buffer *buf) +{ + uint count=0, *map, *p; + struct shared_id *s, *se; + sarray_sort(struct shared_id,sh->ptr,sh->n, i,0, buf); + /* calculate map size */ + count=1; + for(s=sh->ptr,se=s+sh->n;s!=se;) { + uint i=s->i; + if(s->bi==-(uint)1) { ++s; continue; } + count+=3; + for(++s;s!=se&&s->i==i;++s) if(s->bi!=-(uint)1) ++count; + } + /* write map */ + p = map = tmalloc(uint,count); + for(s=sh->ptr,se=s+sh->n;s!=se;) { + uint i=s->i; + if(s->bi==-(uint)1) { ++s; continue; } + *p++ = i, *p++ = s->bi; + for(++s;s!=se&&s->i==i;++s) if(s->bi!=-(uint)1) *p++ = s->bi; + *p++ = -(uint)1; + } + *p = -(uint)1; + return map; +} + + +static struct pw_data *pw_setup_aux(struct array *sh, buffer *buf) +{ + struct pw_data *pwd = tmalloc(struct pw_data,1); + + /* default behavior: receive only remotely unflagged data */ + pw_comm_setup(&pwd->comm[0],sh, FLAGS_REMOTE, buf); + pwd->map[0] = pw_map_setup(sh, buf); + sublist(pwd->map[0], &(pwd->submap[0])); + + /* default behavior: send only locally unflagged data */ + pw_comm_setup(&pwd->comm[1],sh, FLAGS_LOCAL, buf); + pwd->map[1] = pw_map_setup(sh, buf); + sublist(pwd->map[1], &(pwd->submap[1])); + + pwd->req = tmalloc(comm_req,pwd->comm[0].n+pwd->comm[1].n); + pwd->buffer_size = pwd->comm[0].total + pwd->comm[1].total; + + return pwd; +} + +static void pw_free(struct pw_data *data) +{ + pw_comm_free(&data->comm[0]); + pw_comm_free(&data->comm[1]); + free((uint*)data->map[0]); + free((uint*)data->map[1]); + free(data->req); + free(data); + + free((data->submap[0])[0]); + free(data->submap[0]); + free((data->submap[1])[0]); + free(data->submap[1]); +} + +static void pw_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + struct pw_data *pwd = pw_setup_aux(&top->sh,buf); + r->buffer_size = pwd->buffer_size; + r->data = pwd; + r->exec = (exec_fun*)&pw_exec; + r->fin = (fin_fun*)&pw_free; +} + +/*------------------------------------------------------------------------------ + Crystal-Router Execution +------------------------------------------------------------------------------*/ +struct cr_stage { + const uint *scatter_map, *gather_map; + uint size_r, size_r1, size_r2; + uint size_sk, size_s, size_total; + uint p1, p2; + unsigned nrecvn; +}; + +struct cr_data { + struct cr_stage *stage[2]; + unsigned nstages; + uint buffer_size, stage_buffer_size; +}; + +static void cr_exec( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf) +{ + const struct cr_data *crd = execdata; + static gs_scatter_fun *const scatter_user_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop }; + static gs_scatter_fun *const scatter_buf_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec, &gs_scatter }; + static gs_scatter_fun *const scatter_buf_to_user[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec_to_many, &scatter_noop }; + static gs_gather_fun *const gather_buf_to_user[] = + { &gs_gather, &gs_gather_vec, &gs_gather_vec_to_many, &gather_noop }; + static gs_gather_fun *const gather_buf_to_buf[] = + { &gs_gather, &gs_gather_vec, &gs_gather_vec, &gs_gather }; + const unsigned unit_size = vn*gs_dom_size[dom], nstages=crd->nstages; + unsigned k; + char *sendbuf, *buf_old, *buf_new; + const struct cr_stage *stage = crd->stage[transpose]; + buf_old = buf; + buf_new = buf_old + unit_size*crd->stage_buffer_size; + /* crystal router */ + for(k=0;knp+k); + if(stage[k].nrecvn==2) + comm_irecv(&req[2],comm,buf_new+unit_size*stage[k].size_r1, + unit_size*stage[k].size_r2, stage[k].p2, comm->np+k); + sendbuf = buf_new+unit_size*stage[k].size_r; + if(k==0) + scatter_user_to_buf[mode](sendbuf,data,vn,stage[0].scatter_map,dom); + else + scatter_buf_to_buf[mode](sendbuf,buf_old,vn,stage[k].scatter_map,dom), + gather_buf_to_buf [mode](sendbuf,buf_old,vn,stage[k].gather_map ,dom,op); + + comm_isend(&req[0],comm,sendbuf,unit_size*stage[k].size_s, + stage[k].p1, comm->np+k); + comm_wait(&req[0],1+stage[k].nrecvn); + { char *t = buf_old; buf_old=buf_new; buf_new=t; } + } + scatter_buf_to_user[mode](data,buf_old,vn,stage[k].scatter_map,dom); + gather_buf_to_user [mode](data,buf_old,vn,stage[k].gather_map ,dom,op); +} + +/*------------------------------------------------------------------------------ + Crystal-Router setup +------------------------------------------------------------------------------*/ +static void cr_schedule(struct cr_data *data, const struct comm *comm) +{ + const uint id = comm->id; + uint bl=0, n=comm->np; + unsigned k = 0; + while(n>1) { + uint nl = (n+1)/2, bh = bl+nl; + if(idnstages = k; + data->stage[0] = tmalloc(struct cr_stage,2*(k+1)); + data->stage[1] = data->stage[0] + (k+1); + bl=0, n=comm->np, k=0; + while(n>1) { + uint nl = (n+1)/2, bh = bl+nl; + uint targ; unsigned recvn; + recvn = 1, targ = n-1-(id-bl)+bl; + if(id==targ) targ=bh, recvn=0; + if(n&1 && id==bh) recvn=2; + data->stage[1][k].nrecvn=data->stage[0][k].nrecvn=recvn; + data->stage[1][k].p1 =data->stage[0][k].p1 =targ; + data->stage[1][k].p2 =data->stage[0][k].p2 =comm->id-1; + if(idmax; + struct crl_id *w = cw->ptr; + struct shared_id *s, *se; + +#define CW_ADD(aid,ap,ari,asi) do { \ + if(cw_n==cw_max) \ + array_reserve(struct crl_id,cw,cw_n+1),cw_max=cw->max, \ + w=(struct crl_id*)cw->ptr+cw_n; \ + w->id=aid, w->p=ap, w->ri=ari, w->si=asi; \ + ++w, ++cw_n; \ + } while(0) + + for(s=sh->ptr,se=s+sh->n;s!=se;++s) { + int send = (s->flags&send_mask)==0; + int recv = (s->flags&recv_mask)==0; + if(s->i!=last_i) last_i=s->i, added_myself=0; + if(!added_myself && recv && (s->flags&FLAGS_LOCAL)==0) { + added_myself=1; + CW_ADD(s->id,this_p,s->i,s->i); + } + if(send) CW_ADD(s->id,s->p,s->ri,s->i); + } + cw->n=cw_n; +#undef CW_ADD +} + +static void crl_maps(struct cr_stage *stage, struct array *cw, buffer *buf) +{ + struct crl_id *w, *we, *other; + uint scount=1, gcount=1, *sp, *gp; + sarray_sort_2(struct crl_id,cw->ptr,cw->n, bi,0, si,0, buf); + for(w=cw->ptr,we=w+cw->n;w!=we;w=other) { + uint bi=w->bi,any=0,si=w->si; + scount+=3; + for(other=w+1;other!=we&&other->bi==bi;++other) + if(other->si!=si) si=other->si, any=2, ++gcount; + gcount+=any; + } + stage->scatter_map = sp = tmalloc(uint,scount+gcount); + stage->gather_map = gp = sp + scount; + for(w=cw->ptr,we=w+cw->n;w!=we;w=other) { + uint bi=w->bi,any=0,si=w->si; + *sp++ = w->si, *sp++ = bi; + *gp++ = bi; + for(other=w+1;other!=we&&other->bi==bi;++other) + if(other->si!=si) si=other->si, any=1, *gp++ = si; + if(any) *gp++ = -(uint)1; else --gp; + *sp++ = -(uint)1; + } + *sp=-(uint)1, *gp=-(uint)1; +} + +static uint crl_work_label(struct array *cw, struct cr_stage *stage, + uint cutoff, int send_hi, buffer *buf) +{ + struct crl_id *w, *we, *start; + uint nsend, nkeep = 0, nks = 0, bi=0; + /* here w->send has a reverse meaning */ + if(send_hi) for(w=cw->ptr,we=w+cw->n;w!=we;++w) w->send = w->p< cutoff; + else for(w=cw->ptr,we=w+cw->n;w!=we;++w) w->send = w->p>=cutoff; + sarray_sort_2(struct crl_id,cw->ptr,cw->n, id,1, send,0, buf); + for(start=cw->ptr,w=start,we=w+cw->n;w!=we;++w) { + nkeep += w->send; + if(w->id!=start->id) start=w; + if(w->send!=start->send) w->send=0,w->bi=1, ++nks; else w->bi=0; + } + nsend = cw->n-nkeep; + /* assign indices; sent ids have priority (hence w->send is reversed) */ + sarray_sort(struct crl_id,cw->ptr,cw->n, send,0, buf); + for(start=cw->ptr,w=start,we=w+nsend+nks;w!=we;++w) { + if(w->id!=start->id) start=w, ++bi; + if(w->bi!=1) w->send=1; /* switch back to the usual semantics */ + w->bi = bi; + } + stage->size_s = nsend+nks==0 ? 0 : bi+1; + for(we=(struct crl_id*)cw->ptr+cw->n;w!=we;++w) { + if(w->id!=start->id) start=w, ++bi; + w->send = 0; /* switch back to the usual semantics */ + w->bi = bi; + } + stage->size_sk = cw->n==0 ? 0 : bi+1; + crl_maps(stage,cw,buf); + return nsend; +} + +static void crl_bi_to_si(struct crl_id *w, uint n, uint v) { + for(;n;--n) w->si=w->bi+v, ++w; +} + +static void crl_ri_to_bi(struct crl_id *w, uint n) { + for(;n;--n) w->bi=w->ri, ++w; +} + +static uint cr_learn(struct array *cw, struct cr_stage *stage, + const struct comm *comm, buffer *buf) +{ + comm_req req[3]; + const uint id = comm->id; + uint bl=0, n=comm->np; + uint size_max=0; + uint tag = comm->np; + while(n>1) { + uint nl = (n+1)/2, bh = bl+nl; + uint nkeep, nsend[2], nrecv[2][2] = {{0,0},{0,0}}; + struct crl_id *wrecv[2], *wsend; + nsend[0] = crl_work_label(cw,stage,bh,idsize_s; + nkeep = cw->n - nsend[0]; + + if(stage->nrecvn ) comm_irecv(&req[1],comm,nrecv[0],2*sizeof(uint), + stage->p1,tag); + if(stage->nrecvn==2) comm_irecv(&req[2],comm,nrecv[1],2*sizeof(uint), + stage->p2,tag); + comm_isend(&req[0],comm,nsend,2*sizeof(uint),stage->p1,tag); + comm_wait(req,1+stage->nrecvn),++tag; + + stage->size_r1 = nrecv[0][1], stage->size_r2 = nrecv[1][1]; + stage->size_r = stage->size_r1 + stage->size_r2; + stage->size_total = stage->size_r + stage->size_sk; + if(stage->size_total>size_max) size_max=stage->size_total; + + array_reserve(struct crl_id,cw,cw->n+nrecv[0][0]+nrecv[1][0]); + wrecv[0] = cw->ptr, wrecv[0] += cw->n, wrecv[1] = wrecv[0]+nrecv[0][0]; + wsend = cw->ptr, wsend += nkeep; + if(stage->nrecvn ) + comm_irecv(&req[1],comm,wrecv[0],nrecv[0][0]*sizeof(struct crl_id), + stage->p1,tag); + if(stage->nrecvn==2) + comm_irecv(&req[2],comm,wrecv[1],nrecv[1][0]*sizeof(struct crl_id), + stage->p2,tag); + sarray_sort_2(struct crl_id,cw->ptr,cw->n, send,0, bi,0, buf); + comm_isend(&req[0],comm,wsend,nsend[0]*sizeof(struct crl_id),stage->p1,tag); + comm_wait(req,1+stage->nrecvn),++tag; + + crl_bi_to_si(cw->ptr,nkeep,stage->size_r); + if(stage->nrecvn) crl_bi_to_si(wrecv[0],nrecv[0][0],0); + if(stage->nrecvn==2) crl_bi_to_si(wrecv[1],nrecv[1][0],stage->size_r1); + memmove(wsend,wrecv[0],(nrecv[0][0]+nrecv[1][0])*sizeof(struct crl_id)); + cw->n += nrecv[0][0] + nrecv[1][0]; + cw->n -= nsend[0]; + + if(idptr,cw->n); + crl_maps(stage,cw,buf); + return size_max; +} + +static struct cr_data *cr_setup_aux( + struct array *sh, const struct comm *comm, buffer *buf) +{ + uint size_max[2]; + struct array cw = null_array; + struct cr_data *crd = tmalloc(struct cr_data,1); + + /* default behavior: receive only remotely unflagged data */ + /* default behavior: send only locally unflagged data */ + + cr_schedule(crd,comm); + + sarray_sort(struct shared_id,sh->ptr,sh->n, i,0, buf); + crl_work_init(&cw,sh, FLAGS_LOCAL , comm->id); + size_max[0]=cr_learn(&cw,crd->stage[0],comm,buf); + crl_work_init(&cw,sh, FLAGS_REMOTE, comm->id); + size_max[1]=cr_learn(&cw,crd->stage[1],comm,buf); + + crd->stage_buffer_size = size_max[1]>size_max[0]?size_max[1]:size_max[0]; + + array_free(&cw); + + crd->buffer_size = 2*crd->stage_buffer_size; + return crd; +} + +static void cr_free_stage_maps(struct cr_stage *stage, unsigned kmax) +{ + unsigned k; + for(k=0; kscatter_map); + ++stage; + } + free((uint*)stage->scatter_map); +} + +static void cr_free(struct cr_data *data) +{ + cr_free_stage_maps(data->stage[0],data->nstages); + cr_free_stage_maps(data->stage[1],data->nstages); + free(data->stage[0]); + free(data); +} + +static void cr_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + struct cr_data *crd = cr_setup_aux(&top->sh,comm,buf); + r->buffer_size = crd->buffer_size; + r->data = crd; + r->exec = (exec_fun*)&cr_exec; + r->fin = (fin_fun*)&cr_free; +} + +/*------------------------------------------------------------------------------ + All-reduce Execution +------------------------------------------------------------------------------*/ +struct allreduce_data { + const uint *map_to_buf[2], *map_from_buf[2]; + uint buffer_size; +}; + +static void allreduce_exec( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf) +{ + const struct allreduce_data *ard = execdata; + static gs_scatter_fun *const scatter_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop }; + static gs_scatter_fun *const scatter_from_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec_to_many, &scatter_noop }; + uint gvn = vn*(ard->buffer_size/2); + unsigned unit_size = gs_dom_size[dom]; + char *ardbuf; + ardbuf = buf+unit_size*gvn; + /* user array -> buffer */ + gs_init_array(buf,gvn,dom,op); + scatter_to_buf[mode](buf,data,vn,ard->map_to_buf[transpose],dom); + /* all reduce */ + comm_allreduce(comm,dom,op, buf,gvn, ardbuf); + /* buffer -> user array */ + scatter_from_buf[mode](data,buf,vn,ard->map_from_buf[transpose],dom); +} + +/*------------------------------------------------------------------------------ + All-reduce setup +------------------------------------------------------------------------------*/ +static const uint *allreduce_map_setup( + struct array *pr, const unsigned flags_mask, int to_buf) +{ + struct primary_shared_id *p, *pe; + uint count=1, *map, *m; + for(p=pr->ptr,pe=p+pr->n;p!=pe;++p) + if((p->flag&flags_mask)==0) count+=3; + m=map=tmalloc(uint,count); + if(to_buf) { + for(p=pr->ptr,pe=p+pr->n;p!=pe;++p) + if((p->flag&flags_mask)==0) + *m++ = p->i, *m++ = p->ord, *m++ = -(uint)1; + } else { + for(p=pr->ptr,pe=p+pr->n;p!=pe;++p) + if((p->flag&flags_mask)==0) + *m++ = p->ord, *m++ = p->i, *m++ = -(uint)1; + } + *m=-(uint)1; + return map; +} + +static struct allreduce_data *allreduce_setup_aux( + struct array *pr, ulong total_shared) +{ + struct allreduce_data *ard = tmalloc(struct allreduce_data,1); + + /* default behavior: reduce only unflagged data, copy to all */ + ard->map_to_buf [0] = allreduce_map_setup(pr,1,1); + ard->map_from_buf[0] = allreduce_map_setup(pr,0,0); + + /* transpose behavior: reduce all data, copy to unflagged */ + ard->map_to_buf [1] = allreduce_map_setup(pr,0,1); + ard->map_from_buf[1] = allreduce_map_setup(pr,1,0); + + ard->buffer_size = total_shared*2; + return ard; +} + +static void allreduce_free(struct allreduce_data *ard) +{ + free((uint*)ard->map_to_buf[0]); + free((uint*)ard->map_to_buf[1]); + free((uint*)ard->map_from_buf[0]); + free((uint*)ard->map_from_buf[1]); + free(ard); +} + +static void allreduce_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + struct allreduce_data *ard = allreduce_setup_aux(&top->pr,top->total_shared); + r->buffer_size = ard->buffer_size; + r->data = ard; + r->exec = (exec_fun*)&allreduce_exec; + r->fin = (fin_fun*)&allreduce_free; +} + +/*------------------------------------------------------------------------------ + Automatic Setup --- dynamically picks the fastest method +------------------------------------------------------------------------------*/ + +static void dry_run_time(double times[3], const struct gs_remote *r, + const struct comm *comm, buffer *buf) +{ + int i; double t; + buffer_reserve(buf,gs_dom_size[gs_double]*r->buffer_size); + for(i= 2;i;--i) + r->exec(0,mode_dry_run,1,gs_double,gs_add,0,r->data,comm,buf->ptr); + comm_barrier(comm); + t = comm_time(); + for(i=10;i;--i) + r->exec(0,mode_dry_run,1,gs_double,gs_add,0,r->data,comm,buf->ptr); + t = (comm_time() - t)/10; + times[0] = t/comm->np, times[1] = t, times[2] = t; + comm_allreduce(comm,gs_double,gs_add, ×[0],1, &t); + comm_allreduce(comm,gs_double,gs_min, ×[1],1, &t); + comm_allreduce(comm,gs_double,gs_max, ×[2],1, &t); +} + +static void auto_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + pw_setup(r, top,comm,buf); + + if(comm->np>1) { + const char *name = "pairwise"; + struct gs_remote r_alt; + double time[2][3]; + + #define DRY_RUN(i,gsr,str) do { \ + if(comm->id==0) printf(" " str ": "); \ + dry_run_time(time[i],gsr,comm,buf); \ + if(comm->id==0) \ + printf("%g %g %g\n",time[i][0],time[i][1],time[i][2]); \ + } while(0) + + #define DRY_RUN_CHECK(str,new_name) do { \ + DRY_RUN(1,&r_alt,str); \ + if(time[1][2]fin(r->data), *r = r_alt; \ + else \ + r_alt.fin(r_alt.data); \ + } while(0) + + DRY_RUN(0, r, "pairwise times (avg, min, max)"); + + cr_setup(&r_alt, top,comm,buf); + DRY_RUN_CHECK( "crystal router ", "crystal router"); + + if(top->total_shared<100000) { + allreduce_setup(&r_alt, top,comm,buf); + DRY_RUN_CHECK( "all reduce ", "allreduce"); + } + + #undef DRY_RUN_CHECK + #undef DRY_RUN + + if(comm->id==0) printf(" used all_to_all method: %s\n",name); + } +} + +/*------------------------------------------------------------------------------ + Main Execution +------------------------------------------------------------------------------*/ +struct gs_data { + struct comm comm; + const uint *map_local[2]; /* 0=unflagged, 1=all */ + const uint *flagged_primaries; + struct gs_remote r; + uint **submap_local[2]; /* 0=unflagged, 1=all */ + uint **subflagged_primaries; +}; + + +static void gs_aux( + void *u, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, unsigned transpose, + struct gs_data *gsh, buffer *buf) +{ + static gs_scatter_fun *const local_scatter[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many, &scatter_noop }; + static gs_gather_fun *const local_gather [] = + { &gs_gather, &gs_gather_vec, &gs_gather_many, &gather_noop }; + static gs_init_fun *const init[] = + { &gs_init, &gs_init_vec, &gs_init_many, &init_noop }; + + + int thd = 0; + int inp = 0; + #ifdef _OPENMP + thd = omp_get_thread_num(); + inp = omp_in_parallel(); + #endif + + if(!buf) buf = &static_buffer; + + #pragma omp single + { + buffer_reserve(buf,vn*gs_dom_size[dom]*gsh->r.buffer_size); + } + + if (inp) { + local_gather [mode](u,u,vn,(gsh->submap_local[0^transpose])[thd],dom,op); + #pragma omp barrier + + if(transpose==0) init[mode](u,vn,(gsh->subflagged_primaries)[thd],dom,op); + #pragma omp barrier + + gsh->r.exec(u,mode,vn,dom,op,transpose,gsh->r.data,&gsh->comm,buf->ptr); + #pragma omp barrier + + local_scatter[mode](u,u,vn,(gsh->submap_local[1^transpose])[thd],dom); + #pragma omp barrier + + } else { + local_gather [mode](u,u,vn,gsh->map_local[0^transpose],dom,op); + if(transpose==0) init[mode](u,vn,gsh->flagged_primaries,dom,op); + gsh->r.exec(u,mode,vn,dom,op,transpose,gsh->r.data,&gsh->comm,buf->ptr); + local_scatter[mode](u,u,vn,gsh->map_local[1^transpose],dom); + } + +} + +void gs(void *u, gs_dom dom, gs_op op, unsigned transpose, + struct gs_data *gsh, buffer *buf) +{ + gs_aux(u,mode_plain,1,dom,op,transpose,gsh,buf); +} + +void gs_vec(void *u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf) +{ + gs_aux(u,mode_vec,vn,dom,op,transpose,gsh,buf); +} + +void gs_many(void *const*u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf) +{ + gs_aux((void*)u,mode_many,vn,dom,op,transpose,gsh,buf); +} + +/*------------------------------------------------------------------------------ + Main Setup +------------------------------------------------------------------------------*/ +typedef enum { gs_pairwise, gs_crystal_router, gs_all_reduce, + gs_auto } gs_method; + + + +static void local_setup(struct gs_data *gsh, const struct array *nz) +{ + gsh->map_local[0] = local_map(nz,1); + gsh->map_local[1] = local_map(nz,0); + gsh->flagged_primaries = flagged_primaries_map(nz); + sublist(gsh->map_local[0], &(gsh->submap_local[0])); + sublist(gsh->map_local[1], &(gsh->submap_local[1])); + subflagged(gsh->flagged_primaries, &(gsh->subflagged_primaries)); +} + +static void gs_setup_aux(struct gs_data *gsh, const slong *id, uint n, + int unique, gs_method method, int verbose) +{ + static setup_fun *const remote_setup[] = + { &pw_setup, &cr_setup, &allreduce_setup, &auto_setup }; + + struct gs_topology top; + struct crystal cr; + + crystal_init(&cr,&gsh->comm); + + get_topology(&top, id,n, &cr); + if(unique) make_topology_unique(&top,0,gsh->comm.id,&cr.data); + + local_setup(gsh,&top.nz); + + if(verbose && gsh->comm.id==0) + printf("gs_setup: %ld unique labels shared\n",(long)top.total_shared); + + remote_setup[method](&gsh->r, &top,&gsh->comm,&cr.data); + + gs_topology_free(&top); + crystal_free(&cr); +} + +struct gs_data *gs_setup(const slong *id, uint n, const struct comm *comm, + int unique, gs_method method, int verbose) +{ + struct gs_data *gsh = tmalloc(struct gs_data,1); + comm_dup(&gsh->comm,comm); + gs_setup_aux(gsh,id,n,unique,method,verbose); + return gsh; +} + +void gs_free(struct gs_data *gsh) +{ + comm_free(&gsh->comm); + free((uint*)gsh->map_local[0]), free((uint*)gsh->map_local[1]); + free((uint*)gsh->flagged_primaries); + gsh->r.fin(gsh->r.data); + free((gsh->submap_local[0])[0]); + free(gsh->submap_local[0]); + free((gsh->submap_local[1])[0]); + free(gsh->submap_local[1]); + free((gsh->subflagged_primaries)[0]); + free(gsh->subflagged_primaries); + free(gsh); +} + +void gs_unique(slong *id, uint n, const struct comm *comm) +{ + struct gs_topology top; + struct crystal cr; + crystal_init(&cr,comm); + get_topology(&top, id,n, &cr); + make_topology_unique(&top,id,comm->id,&cr.data); + gs_topology_free(&top); + crystal_free(&cr); +} + +/*------------------------------------------------------------------------------ + FORTRAN interface +------------------------------------------------------------------------------*/ + +#undef gs_op + +#undef gs_free +#undef gs_setup +#undef gs_many +#undef gs_vec +#undef gs +#define cgs PREFIXED_NAME(gs ) +#define cgs_vec PREFIXED_NAME(gs_vec ) +#define cgs_many PREFIXED_NAME(gs_many ) +#define cgs_setup PREFIXED_NAME(gs_setup) +#define cgs_free PREFIXED_NAME(gs_free ) + +#define fgs_setup FORTRAN_NAME(gs_setup ,GS_SETUP ) +#define fgs FORTRAN_NAME(gs_op ,GS_OP ) +#define fgs_vec FORTRAN_NAME(gs_op_vec ,GS_OP_VEC ) +#define fgs_many FORTRAN_NAME(gs_op_many ,GS_OP_MANY ) +#define fgs_fields FORTRAN_NAME(gs_op_fields,GS_OP_FIELDS) +#define fgs_free FORTRAN_NAME(gs_free ,GS_FREE ) + +static struct gs_data **fgs_info = 0; +static int fgs_max = 0; +static int fgs_n = 0; + +void fgs_setup(sint *handle, const slong id[], const sint *n, + const MPI_Fint *comm, const sint *np) +{ + struct gs_data *gsh; + if(fgs_n==fgs_max) fgs_max+=fgs_max/2+1, + fgs_info=trealloc(struct gs_data*,fgs_info,fgs_max); + gsh=fgs_info[fgs_n]=tmalloc(struct gs_data,1); + comm_init_check(&gsh->comm,*comm,*np); + gs_setup_aux(gsh,id,*n,0,gs_pairwise,1); + *handle = fgs_n++; +} + +static void fgs_check_handle(sint handle, const char *func, unsigned line) +{ + if(handle<0 || handle>=fgs_n || !fgs_info[handle]) + fail(1,__FILE__,line,"%s: invalid handle", func); +} + +static void fgs_check_parms(sint handle, sint dom, sint op, + const char *func, unsigned line) +{ + if(dom<1 || dom>4) + fail(1,__FILE__,line,"%s: datatype %d not in valid range 1-4",func,dom); + if(op <1 || op >4) + fail(1,__FILE__,line,"%s: op %d not in valid range 1-4",func,op); + fgs_check_handle(handle,func,line); +} + +void fgs(const sint *handle, void *u, const sint *dom, const sint *op, + const sint *transpose) +{ + fgs_check_parms(*handle,*dom,*op,"gs_op",__LINE__); + cgs(u,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0,fgs_info[*handle],0); +} + +void fgs_vec(const sint *handle, void *u, const sint *n, + const sint *dom, const sint *op, const sint *transpose) +{ + fgs_check_parms(*handle,*dom,*op,"gs_op_vec",__LINE__); + cgs_vec(u,*n,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0, + fgs_info[*handle],0); +} + +void fgs_many(const sint *handle, void *u1, void *u2, void *u3, + void *u4, void *u5, void *u6, const sint *n, + const sint *dom, const sint *op, const sint *transpose) +{ + void *uu[6]; + uu[0]=u1,uu[1]=u2,uu[2]=u3,uu[3]=u4,uu[4]=u5,uu[5]=u6; + fgs_check_parms(*handle,*dom,*op,"gs_op_many",__LINE__); + cgs_many((void *const*)uu,*n,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0, + fgs_info[*handle],0); +} + +static struct array fgs_fields_array = null_array; + +void fgs_fields(const sint *handle, + void *u, const sint *stride, const sint *n, + const sint *dom, const sint *op, const sint *transpose) +{ + size_t offset; + void **p; + uint i; + + fgs_check_parms(*handle,*dom,*op,"gs_op_fields",__LINE__); + if(*n<0) return; + + array_reserve(void*,&fgs_fields_array,*n); + p = fgs_fields_array.ptr; + offset = *stride * gs_dom_size[*dom-1]; + for(i=*n;i;--i) *p++ = u, u = (char*)u + offset; + + cgs_many((void *const*)fgs_fields_array.ptr,*n, + (gs_dom)(*dom-1),(gs_op_t)(*op-1), + *transpose!=0, fgs_info[*handle],0); +} + +void fgs_free(const sint *handle) +{ + fgs_check_handle(*handle,"gs_free",__LINE__); + cgs_free(fgs_info[*handle]); + fgs_info[*handle] = 0; +} + diff --git a/src/jl/gs.h b/src/jl/gs.h new file mode 100644 index 0000000..43fc142 --- /dev/null +++ b/src/jl/gs.h @@ -0,0 +1,141 @@ +#ifndef GS_H +#define GS_H + +#if !defined(COMM_H) || !defined(GS_DEFS_H) || !defined(MEM_H) +#warning "gs.h" requires "comm.h", "gs_defs.h", and "mem.h" +#endif + +/* + Gather/Scatter Library + + The code + + struct comm c; // see "comm.h" + slong id[n]; // the slong type is defined in "types.h" + ... + struct gs_data *g = gs_setup(id,n, &c, 0,gs_auto,1); + + defines a partition of the set of (processor, local index) pairs, + (p,i) \in S_j iff abs(id[i]) == j on processor p + That is, all (p,i) pairs are grouped together (in group S_j) that have the + same id (=j). + S_0 is treated specially --- it is ignored completely + (i.e., when id[i] == 0, local index i does not participate in any + gather/scatter operation + If id[i] on proc p is negative then the pair (p,i) is "flagged". This + determines the non-symmetric behavior. For the simpler, symmetric case, + all id's should be positive. + + The second to last argument to gs_setup is the method to use, one of + gs_pairwise, gs_crystal_router, gs_all_reduce, gs_auto + The method "gs_auto" tries ~10 runs of each and chooses the fastest. + For a single-use handle, it makes more sense to use "gs_crystal_router". + + When "g" is no longer needed, free it with + + gs_free(g); + + A basic gather/scatter operation is, e.g., + + double v[n]; buffer buf; // see "mem.h" for "buffer" + ... + gs(v, gs_double,gs_add, 0, g,&buf); + + The buffer pointer can be null, in which case, a static buffer is used, + shared across all gs handles. + This gs call has the effect, (in the simple, symmetric, unflagged case) + + v[i] <-- \sum_{ (p,j) \in S_{id[i]} } v_(p) [j] + + where v_(p) [j] means v[j] on proc p. In other words, every v[i] is replaced + by the sum of all v[j]'s with the same id, given by id[i]. This accomplishes + "direct stiffness summation" corresponding to the action of QQ^T, where + "Q" is a boolean matrix that copies from a global vector (indexed by id) + to the local vectors indexed by (p,i) pairs. + + Summation on doubles is not the only operation and datatype supported. The + full list is defined in "gs_defs.h", and includes the operations + gs_add, gs_mul, gs_max, gs_min + and datatypes + gs_double, gs_float, gs_int, gs_long, gs_sint, gs_slong. + (The int and long types are the plain C types, whereas sint and slong + are defined in "types.h"). + + For the nonsymmetric behavior, the "transpose" parameter is important: + + gs(v, gs_double,gs_add, transpose, g,&buf); + + When transpose == 0, any "flagged" (p,i) pairs (id[i] negative on p) + do not participate in the sum, but *do* still receive the sum on output. + As a special case, when only one (p,i) pair is unflagged per group this + corresponds to the rectangular "Q" matrix referred to above. + + When transpose == 1, the "flagged" (p,i) pairs *do* participate in the sum, + but do *not* get set on output. In the special case of only one unflagged + (p,i) pair, this corresponds to the transpose of "Q" referred to above. + + + + A version for vectors (contiguously packed) is, e.g., + + double v[n][k]; + gs_vec(v,k, gs_double,gs_add, transpose, g,&buf); + + which is like "gs" operating on the datatype double[k], + with summation here being vector summation. Number of messages sent + is independent of k. + + For combining the communication for "gs" on multiple arrays: + + double v1[n], v2[n], ..., vk[n]; + double (*vs)[k] = {v1, v2, ..., vk}; + + gs_many(vs,k, gs_double,op, t, g,&buf); + + This call is equivalent to + + gs(v1, gs_double,op, t, g, &buf); + gs(v2, gs_double,op, t, g, &buf); + ... + gs(vk, gs_double,op, t, g, &buf); + + except that all communication is done together. + + + + Finally, gs_unique has the same basic signature as gs_setup: + + gs_unique(id,n, &c); + + This call modifies id, "flagging" (by negating id[i]) all (p,i) pairs in + each group except one. The sole "unflagged" member of the group is chosen + in an arbitrary but consistent way. If the "unique" flag is set when + calling gs_setup, the behavior is equivalent to first calling gs_unique, + except that the id array is left unmodified. + + +*/ + +#define gs PREFIXED_NAME(gs ) +#define gs_vec PREFIXED_NAME(gs_vec ) +#define gs_many PREFIXED_NAME(gs_many ) +#define gs_setup PREFIXED_NAME(gs_setup ) +#define gs_free PREFIXED_NAME(gs_free ) +#define gs_unique PREFIXED_NAME(gs_unique) + +struct gs_data; +typedef enum { gs_pairwise, gs_crystal_router, gs_all_reduce, + gs_auto } gs_method; + +void gs(void *u, gs_dom dom, gs_op op, unsigned transpose, + struct gs_data *gsh, buffer *buf); +void gs_vec(void *u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf); +void gs_many(void *const*u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf); +struct gs_data *gs_setup(const slong *id, uint n, const struct comm *comm, + int unique, gs_method method, int verbose); +void gs_free(struct gs_data *gsh); +void gs_unique(slong *id, uint n, const struct comm *comm); + +#endif diff --git a/src/jl/gs_defs.h b/src/jl/gs_defs.h new file mode 100644 index 0000000..df4ad7b --- /dev/null +++ b/src/jl/gs_defs.h @@ -0,0 +1,81 @@ +#ifndef GS_DEFS_H +#define GS_DEFS_H + +/* requires: + , for GS_DEFINE_IDENTITIES() + "types.h" for gs_sint, gs_slong +*/ + +/*------------------------------------------------------------------------------ + Monoid Definitions + + Here are defined the domains and operations, each combination being a + commutative semigroup, as well as the identity element making each a + commutative monoid. +------------------------------------------------------------------------------*/ + +/* the supported domains */ +#define GS_FOR_EACH_DOMAIN(macro) \ + macro(double) \ + macro(float ) \ + macro(int ) \ + macro(long ) \ + WHEN_LONG_LONG(macro(long_long)) + +/* the supported ops */ +#define GS_FOR_EACH_OP(T,macro) \ + macro(T,add) \ + macro(T,mul) \ + macro(T,min) \ + macro(T,max) \ + macro(T,bpr) + +#define GS_DO_add(a,b) a+=b +#define GS_DO_mul(a,b) a*=b +#define GS_DO_min(a,b) if(ba) a=b +#define GS_DO_bpr(a,b) \ + do if(b!=0) { uint a_ = a; uint b_ = b; \ + if(a_==0) { a=b_; break; } \ + for(;;) { if(a_>=1; else if(b_>=1; else break; } \ + a = a_; \ + } while(0) + +/* the monoid identity elements */ +#define GS_DEFINE_MONOID_ID(T,min,max) \ + static const T gs_identity_##T[] = { 0, 1, max, min, 0 }; +#define GS_DEFINE_IDENTITIES() \ + GS_DEFINE_MONOID_ID(double, -DBL_MAX, DBL_MAX) \ + GS_DEFINE_MONOID_ID(float , -FLT_MAX, FLT_MAX) \ + GS_DEFINE_MONOID_ID(int , INT_MIN, INT_MAX) \ + GS_DEFINE_MONOID_ID(long , LONG_MIN, LONG_MAX) \ + WHEN_LONG_LONG(GS_DEFINE_MONOID_ID(long_long,LLONG_MIN,LLONG_MAX)) + +/*------------------------------------------------------------------------------ + Enums and constants +------------------------------------------------------------------------------*/ + +/* domain enum */ +#define LIST GS_FOR_EACH_DOMAIN(ITEM) gs_dom_n +#define ITEM(T) gs_##T, +typedef enum { LIST } gs_dom; +#undef ITEM +#undef LIST + +#define gs_sint TYPE_LOCAL(gs_int,gs_long,gs_long_long) +#define gs_slong TYPE_GLOBAL(gs_int,gs_long,gs_long_long) + +/* domain type size array */ +#define GS_DOM_SIZE_ITEM(T) sizeof(T), +#define GS_DEFINE_DOM_SIZES() \ + static const unsigned gs_dom_size[] = \ + { GS_FOR_EACH_DOMAIN(GS_DOM_SIZE_ITEM) 0 }; + +/* operation enum */ +#define LIST GS_FOR_EACH_OP(T,ITEM) gs_op_n +#define ITEM(T,op) gs_##op, +typedef enum { LIST } gs_op; +#undef ITEM +#undef LIST + +#endif diff --git a/src/jl/gs_local.c b/src/jl/gs_local.c new file mode 100644 index 0000000..2bc246d --- /dev/null +++ b/src/jl/gs_local.c @@ -0,0 +1,336 @@ +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "types.h" + +#define gs_gather_array PREFIXED_NAME(gs_gather_array ) +#define gs_init_array PREFIXED_NAME(gs_init_array ) +#define gs_gather PREFIXED_NAME(gs_gather ) +#define gs_scatter PREFIXED_NAME(gs_scatter ) +#define gs_init PREFIXED_NAME(gs_init ) +#define gs_gather_vec PREFIXED_NAME(gs_gather_vec ) +#define gs_scatter_vec PREFIXED_NAME(gs_scatter_vec ) +#define gs_init_vec PREFIXED_NAME(gs_init_vec ) +#define gs_gather_many PREFIXED_NAME(gs_gather_many ) +#define gs_scatter_many PREFIXED_NAME(gs_scatter_many ) +#define gs_init_many PREFIXED_NAME(gs_init_many ) +#define gs_gather_vec_to_many PREFIXED_NAME(gs_gather_vec_to_many ) +#define gs_scatter_many_to_vec PREFIXED_NAME(gs_scatter_many_to_vec) +#define gs_scatter_vec_to_many PREFIXED_NAME(gs_scatter_vec_to_many) + +#include "gs_defs.h" +GS_DEFINE_IDENTITIES() +GS_DEFINE_DOM_SIZES() + +/*------------------------------------------------------------------------------ + The array gather kernel +------------------------------------------------------------------------------*/ +#define DEFINE_GATHER(T,OP) \ +static void gather_array_##T##_##OP( \ + T *restrict out, const T *restrict in, uint n) \ +{ \ + for(;n;--n) { T q = *in++, *p = out++; GS_DO_##OP(*p,q); } \ +} + +/*------------------------------------------------------------------------------ + The array initialization kernel +------------------------------------------------------------------------------*/ +#define DEFINE_INIT(T) \ +static void init_array_##T(T *restrict out, uint n, gs_op op) \ +{ \ + const T e = gs_identity_##T[op]; \ + for(;n;--n) *out++=e; \ +} + +#define DEFINE_PROCS(T) \ + GS_FOR_EACH_OP(T,DEFINE_GATHER) \ + DEFINE_INIT(T) + +GS_FOR_EACH_DOMAIN(DEFINE_PROCS) + +#undef DEFINE_PROCS +#undef DEFINE_INIT +#undef DEFINE_GATHER + +/*------------------------------------------------------------------------------ + The basic gather kernel +------------------------------------------------------------------------------*/ +#define DEFINE_GATHER(T,OP) \ +static void gather_##T##_##OP( \ + T *restrict out, const T *restrict in, const unsigned in_stride, \ + const uint *restrict map) \ +{ \ + uint i,j; \ + while((i=*map++)!=-(uint)1) { \ + T t=out[i]; \ + j=*map++; do GS_DO_##OP(t,in[j*in_stride]); while((j=*map++)!=-(uint)1); \ + out[i]=t; \ + } \ +} + +/*------------------------------------------------------------------------------ + The basic scatter kernel +------------------------------------------------------------------------------*/ +#define DEFINE_SCATTER(T) \ +static void scatter_##T( \ + T *restrict out, const unsigned out_stride, \ + const T *restrict in, const unsigned in_stride, \ + const uint *restrict map) \ +{ \ + uint i,j; \ + while((i=*map++)!=-(uint)1) { \ + T t=in[i*in_stride]; \ + j=*map++; do out[j*out_stride]=t; while((j=*map++)!=-(uint)1); \ + } \ +} + +/*------------------------------------------------------------------------------ + The basic initialization kernel +------------------------------------------------------------------------------*/ +#define DEFINE_INIT(T) \ +static void init_##T(T *restrict out, const uint *restrict map, gs_op op) \ +{ \ + uint i; const T e = gs_identity_##T[op]; \ + while((i=*map++)!=-(uint)1) out[i]=e; \ +} + +#define DEFINE_PROCS(T) \ + GS_FOR_EACH_OP(T,DEFINE_GATHER) \ + DEFINE_SCATTER(T) \ + DEFINE_INIT(T) + +GS_FOR_EACH_DOMAIN(DEFINE_PROCS) + +#undef DEFINE_PROCS +#undef DEFINE_INIT +#undef DEFINE_SCATTER +#undef DEFINE_GATHER + +/*------------------------------------------------------------------------------ + The vector gather kernel +------------------------------------------------------------------------------*/ +#define DEFINE_GATHER(T,OP) \ +static void gather_vec_##T##_##OP( \ + T *restrict out, const T *restrict in, const unsigned vn, \ + const uint *restrict map) \ +{ \ + uint i,j; \ + while((i=*map++)!=-(uint)1) { \ + T *restrict p = &out[i*vn], *pe = p+vn; \ + j=*map++; do { \ + const T *restrict q = &in[j*vn]; \ + T *restrict pk=p; do { GS_DO_##OP(*pk,*q); ++pk, ++q; } while(pk!=pe); \ + } while((j=*map++)!=-(uint)1); \ + } \ +} + +/*------------------------------------------------------------------------------ + The vector scatter kernel +------------------------------------------------------------------------------*/ +void gs_scatter_vec( + void *restrict out, const void *restrict in, const unsigned vn, + const uint *restrict map, gs_dom dom) +{ + unsigned unit_size = vn*gs_dom_size[dom]; + uint i,j; + while((i=*map++)!=-(uint)1) { + const char *t = (const char *)in + i*unit_size; + j=*map++; do + memcpy((char *)out+j*unit_size,t,unit_size); + while((j=*map++)!=-(uint)1); + } +} + +/*------------------------------------------------------------------------------ + The vector initialization kernel +------------------------------------------------------------------------------*/ +#define DEFINE_INIT(T) \ +static void init_vec_##T(T *restrict out, const unsigned vn, \ + const uint *restrict map, gs_op op) \ +{ \ + uint i; const T e = gs_identity_##T[op]; \ + while((i=*map++)!=-(uint)1) { \ + T *restrict u = (T*)out + vn*i, *ue = u+vn; \ + do *u++ = e; while(u!=ue); \ + } \ +} + +#define DEFINE_PROCS(T) \ + GS_FOR_EACH_OP(T,DEFINE_GATHER) \ + DEFINE_INIT(T) + +GS_FOR_EACH_DOMAIN(DEFINE_PROCS) + +#undef DEFINE_PROCS +#undef DEFINE_INIT +#undef DEFINE_GATHER + +#undef DO_bpr +#undef DO_max +#undef DO_min +#undef DO_mul +#undef DO_add + +#define SWITCH_DOMAIN_CASE(T) case gs_##T: WITH_DOMAIN(T); break; +#define SWITCH_DOMAIN(dom) do switch(dom) { \ + GS_FOR_EACH_DOMAIN(SWITCH_DOMAIN_CASE) case gs_dom_n: break; } while(0) + +#define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break; +#define SWITCH_OP(T,op) do switch(op) { \ + GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0) + +/*------------------------------------------------------------------------------ + Array kernels +------------------------------------------------------------------------------*/ +void gs_gather_array(void *out, const void *in, uint n, gs_dom dom, gs_op op) +{ +#define WITH_OP(T,OP) gather_array_##T##_##OP(out,in,n) +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_init_array(void *out, uint n, gs_dom dom, gs_op op) +{ +#define WITH_DOMAIN(T) init_array_##T(out,n,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +/*------------------------------------------------------------------------------ + Plain kernels; vn parameter ignored but present for consistent signatures +------------------------------------------------------------------------------*/ +void gs_gather(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ +#define WITH_OP(T,OP) gather_##T##_##OP(out,in,1,map) +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_scatter(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{ +#define WITH_DOMAIN(T) scatter_##T(out,1,in,1,map) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +void gs_init(void *out, const unsigned vn, const uint *map, + gs_dom dom, gs_op op) +{ +#define WITH_DOMAIN(T) init_##T(out,map,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +/*------------------------------------------------------------------------------ + Vector kernels +------------------------------------------------------------------------------*/ +void gs_gather_vec(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ +#define WITH_OP(T,OP) gather_vec_##T##_##OP(out,in,vn,map) +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_init_vec(void *out, const unsigned vn, const uint *map, + gs_dom dom, gs_op op) +{ +#define WITH_DOMAIN(T) init_vec_##T(out,vn,map,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +/*------------------------------------------------------------------------------ + Multiple array kernels +------------------------------------------------------------------------------*/ +void gs_gather_many(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ + uint k; + typedef void *ptr_to_void; typedef const void *ptr_to_const_void; + const ptr_to_void *p = out; const ptr_to_const_void *q = in; +#define WITH_OP(T,OP) for(k=0;k multiple arrays + Scatter from multiple arrays -> strided array, + Scatter from strided array -> multiple arrays, +------------------------------------------------------------------------------*/ +void gs_gather_vec_to_many(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ + unsigned i; const unsigned unit_size = gs_dom_size[dom]; + typedef void *ptr_to_void; + const ptr_to_void *p = out; const char *q = in; +#define WITH_OP(T,OP) \ + for(i=vn;i;--i) gather_##T##_##OP(*p++,(const T*)q,vn,map), q+=unit_size +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_scatter_many_to_vec(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{ + unsigned i; const unsigned unit_size = gs_dom_size[dom]; + typedef const void *ptr_to_const_void; + char *p = out; const ptr_to_const_void *q = in; +#define WITH_DOMAIN(T) \ + for(i=vn;i;--i) scatter_##T((T*)p,vn,*q++,1,map), p+=unit_size + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +void gs_scatter_vec_to_many(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{ + unsigned i; const unsigned unit_size = gs_dom_size[dom]; + typedef void *ptr_to_void; + const ptr_to_void *p = out; const char *q = in; +#define WITH_DOMAIN(T) \ + for(i=vn;i;--i) scatter_##T(*p++,1,(const T*)q,vn,map), q+=unit_size + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +#undef SWITCH_OP +#undef SWITCH_OP_CASE +#undef SWITCH_DOMAIN +#undef SWITCH_DOMAIN_CASE diff --git a/src/jl/gs_local.h b/src/jl/gs_local.h new file mode 100644 index 0000000..fc7c414 --- /dev/null +++ b/src/jl/gs_local.h @@ -0,0 +1,43 @@ +#ifndef GS_LOCAL_H +#define GS_LOCAL_H + +#if !defined(NAME_H) || !defined(TYPES_H) || !defined(GS_DEFS_H) +#warning "gs_local.h" requires "name.h", "types.h", and "gs_defs.h" +#endif + +#define gs_gather_array PREFIXED_NAME(gs_gather_array ) +#define gs_init_array PREFIXED_NAME(gs_init_array ) +#define gs_gather PREFIXED_NAME(gs_gather ) +#define gs_scatter PREFIXED_NAME(gs_scatter ) +#define gs_init PREFIXED_NAME(gs_init ) +#define gs_gather_vec PREFIXED_NAME(gs_gather_vec ) +#define gs_scatter_vec PREFIXED_NAME(gs_scatter_vec ) +#define gs_init_vec PREFIXED_NAME(gs_init_vec ) +#define gs_gather_many PREFIXED_NAME(gs_gather_many ) +#define gs_scatter_many PREFIXED_NAME(gs_scatter_many ) +#define gs_init_many PREFIXED_NAME(gs_init_many ) +#define gs_gather_vec_to_many PREFIXED_NAME(gs_gather_vec_to_many ) +#define gs_scatter_many_to_vec PREFIXED_NAME(gs_scatter_many_to_vec) +#define gs_scatter_vec_to_many PREFIXED_NAME(gs_scatter_vec_to_many) + +void gs_gather_array(void *out, const void *in, uint n, + gs_dom dom, gs_op op); +void gs_init_array(void *out, uint n, gs_dom dom, gs_op op); + +typedef void gs_gather_fun( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op); +typedef void gs_scatter_fun( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom); +typedef void gs_init_fun( + void *out, const unsigned vn, + const uint *map, gs_dom dom, gs_op op); + +extern gs_gather_fun gs_gather, gs_gather_vec, gs_gather_many, + gs_gather_vec_to_many; +extern gs_scatter_fun gs_scatter, gs_scatter_vec, gs_scatter_many, + gs_scatter_many_to_vec, gs_scatter_vec_to_many; +extern gs_init_fun gs_init, gs_init_vec, gs_init_many; + +#endif diff --git a/src/jl/gs_test.c b/src/jl/gs_test.c new file mode 100644 index 0000000..588a52b --- /dev/null +++ b/src/jl/gs_test.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "gs_defs.h" +#include "gs.h" + +typedef double T; +const gs_dom dom = gs_double; + +static void test(const struct comm *comm) +{ + struct gs_data *gsh; + const uint np = comm->np; + slong *id = tmalloc(slong,np+4); + T *v = tmalloc(T,np+4); + uint i; + id[0] = -(slong)(np+10+3*comm->id); + for(i=0;iid+1; + id[np+2] = comm->id+1; + id[np+3] = np-comm->id; + gsh = gs_setup(id,np+4,comm,0,gs_auto,1); + free(id); + + for(i=0;iid==0) for(i=0;iid==0) printf("\n"); + for(i=0;iid==0) for(i=0;i +#include +#include +#ifdef MPI +# include +#else + typedef void MPI_Comm; +#endif +#include "name.h" +#include "types.h" + +typedef long real; +sint datatype = 4; + +#define fgs_setup FORTRAN_NAME(gs_setup ,GS_SETUP ) +#define fgs_op FORTRAN_NAME(gs_op ,GS_OP ) +#define fgs_op_vec FORTRAN_NAME(gs_op_vec ,GS_OP_VEC ) +#define fgs_op_many FORTRAN_NAME(gs_op_many ,GS_OP_MANY ) +#define fgs_op_fields FORTRAN_NAME(gs_op_fields,GS_OP_FIELDS) +#define fgs_free FORTRAN_NAME(gs_free ,GS_FREE ) + +void fgs_setup(sint *handle, const slong id[], const sint *n, + const MPI_Comm *comm, const sint *np); +void fgs_op(const sint *handle, void *u, const sint *dom, const sint *op, + const sint *transpose); +void fgs_op_vec(const sint *handle, void *u, const sint *n, + const sint *dom, const sint *op, const sint *transpose); +void fgs_op_many(const sint *handle, void *u1, void *u2, void *u3, + void *u4, void *u5, void *u6, const sint *n, + const sint *dom, const sint *op, const sint *transpose); +void fgs_free(const sint *handle); + +void assert_is_zero(real v) +{ + if(fabs(v) < 1e-20) return; + printf("test failed\n"); + exit(1); +} + +int main(int narg, char* arg[]) +{ + sint transpose=0; + sint id=0,np=1; + sint i,handle,maxv=3; + real *u; + slong *glindex; +#ifndef MPI + int comm; +#else + MPI_Comm comm; + MPI_Init(&narg,&arg); + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + { int i; + MPI_Comm_rank(comm,&i); id=i; + MPI_Comm_size(comm,&i); np=i; + } +#endif + + glindex = malloc(np*2*sizeof(slong)); + for(i=0;i +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "gs_defs.h" +#include "gs.h" + +static void test(const struct comm *comm) +{ + uint i,np=comm->np,id=comm->id; + slong *glindex = tmalloc(slong,np*2); + char *out, *buf = tmalloc(char,80+np*2*30); + struct gs_data *gsh; + + for(i=0;iid); + for(i=0;iid); + for(i=0;iid); + for(i=0;i for size_t, offsetof + for malloc, calloc, realloc, free + for memcpy + "c99.h" + "fail.h" +*/ + +#if !defined(C99_H) || !defined(FAIL_H) +#error "mem.h" requires "c99.h" and "fail.h" +#endif + +/* + All memory management goes through the wrappers defined in this + header. Diagnostics can be turned on with + -DPRINT_MALLOCS=1 + Then all memory management operations will be printed to stdout. + + Most memory management occurs through use of the "array" type, + defined below, which defines a generic dynamically-sized array + that grows in bursts. The "buffer" type is a "char" array and + is often passed around by code to provide a common area for + scratch work. +*/ + +#ifndef PRINT_MALLOCS +# define PRINT_MALLOCS 0 +#else +# include +# ifndef comm_gbl_id +# define comm_gbl_id PREFIXED_NAME(comm_gbl_id) +# define comm_gbl_np PREFIXED_NAME(comm_gbl_np) +# include "types.h" + extern uint comm_gbl_id, comm_gbl_np; +# endif +#endif + +/*-------------------------------------------------------------------------- + Memory Allocation Wrappers to Catch Out-of-memory + --------------------------------------------------------------------------*/ + +static inline void *smalloc(size_t size, const char *file, unsigned line) +{ + void *restrict res = malloc(size); + #if PRINT_MALLOCS + fprintf(stdout,"MEM: proc %04d: %p = malloc(%ld) @ %s(%u)\n", + (int)comm_gbl_id,res,(long)size,file,line), fflush(stdout); + #endif + if(!res && size) + fail(1,file,line,"allocation of %ld bytes failed\n",(long)size); + return res; +} + +static inline void *scalloc( + size_t nmemb, size_t size, const char *file, unsigned line) +{ + void *restrict res = calloc(nmemb, size); + #if PRINT_MALLOCS + fprintf(stdout,"MEM: proc %04d: %p = calloc(%ld) @ %s(%u)\n", + (int)comm_gbl_id,res,(long)size*nmemb,file,line), fflush(stdout); + #endif + if(!res && nmemb) + fail(1,file,line,"allocation of %ld bytes failed\n", + (long)size*nmemb); + return res; +} + +static inline void *srealloc( + void *restrict ptr, size_t size, const char *file, unsigned line) +{ + void *restrict res = realloc(ptr, size); + #if PRINT_MALLOCS + if(res!=ptr) { + if(ptr) + fprintf(stdout,"MEM: proc %04d: %p freed by realloc @ %s(%u)\n", + (int)comm_gbl_id,ptr,file,line), fflush(stdout); + fprintf(stdout,"MEM: proc %04d: %p = realloc of %p to %lu @ %s(%u)\n", + (int)comm_gbl_id,res,ptr,(long)size,file,line), fflush(stdout); + } else + fprintf(stdout,"MEM: proc %04d: %p realloc'd to %lu @ %s(%u)\n", + (int)comm_gbl_id,res,(long)size,file,line), fflush(stdout); + #endif + if(!res && size) + fail(1,file,line,"allocation of %ld bytes failed\n",(long)size); + return res; +} + +#define tmalloc(type, count) \ + ((type*) smalloc((count)*sizeof(type),__FILE__,__LINE__) ) +#define tcalloc(type, count) \ + ((type*) scalloc((count),sizeof(type),__FILE__,__LINE__) ) +#define trealloc(type, ptr, count) \ + ((type*) srealloc((ptr),(count)*sizeof(type),__FILE__,__LINE__) ) + +#if PRINT_MALLOCS +static inline void sfree(void *restrict ptr, const char *file, unsigned line) +{ + free(ptr); + fprintf(stdout,"MEM: proc %04d: %p freed @ %s(%u)\n", + (int)comm_gbl_id,ptr,file,line), fflush(stdout); +} +#define free(x) sfree(x,__FILE__,__LINE__) +#endif + +/*-------------------------------------------------------------------------- + A dynamic array + --------------------------------------------------------------------------*/ +struct array { void *ptr; size_t n,max; }; +#define null_array {0,0,0} +static void array_init_(struct array *a, size_t max, size_t size, + const char *file, unsigned line) +{ + a->n=0, a->max=max, a->ptr=smalloc(max*size,file,line); +} +static void array_resize_(struct array *a, size_t max, size_t size, + const char *file, unsigned line) +{ + a->max=max, a->ptr=srealloc(a->ptr,max*size,file,line); +} +static void *array_reserve_(struct array *a, size_t min, size_t size, + const char *file, unsigned line) +{ + size_t max = a->max; + if(maxptr; +} + +#define array_free(a) (free((a)->ptr)) +#define array_init(T,a,max) array_init_(a,max,sizeof(T),__FILE__,__LINE__) +#define array_resize(T,a,max) array_resize_(a,max,sizeof(T),__FILE__,__LINE__) +#define array_reserve(T,a,min) array_reserve_(a,min,sizeof(T),__FILE__,__LINE__) + +static void array_cat_(size_t size, struct array *d, const void *s, size_t n, + const char *file, unsigned line) +{ + char *out = array_reserve_(d,d->n+n,size, file,line); + memcpy(out+d->n*size, s, n*size); + d->n+=n; +} + +#define array_cat(T,d,s,n) array_cat_(sizeof(T),d,s,n,__FILE__,__LINE__) + +/*-------------------------------------------------------------------------- + Buffer = char array + --------------------------------------------------------------------------*/ +typedef struct array buffer; +#define null_buffer null_array +#define buffer_init(b,max) array_init(char,b,max) +#define buffer_resize(b,max) array_resize(char,b,max) +#define buffer_reserve(b,max) array_reserve(char,b,max) +#define buffer_free(b) array_free(b) + +/*-------------------------------------------------------------------------- + Alignment routines + --------------------------------------------------------------------------*/ +#define ALIGNOF(T) offsetof(struct { char c; T x; }, x) +static size_t align_as_(size_t a, size_t n) { return (n+a-1)/a*a; } +#define align_as(T,n) align_as_(ALIGNOF(T),n) +#define align_ptr(T,base,offset) ((T*)((char*)(base)+align_as(T,offset))) +#endif + diff --git a/src/jl/name.h b/src/jl/name.h new file mode 100644 index 0000000..b4bcd91 --- /dev/null +++ b/src/jl/name.h @@ -0,0 +1,44 @@ +#ifndef NAME_H +#define NAME_H + +/* establishes some macros to establish + * the FORTRAN naming convention + default gs_setup, etc. + -DUPCASE GS_SETUP, etc. + -DUNDERSCORE gs_setup_, etc. + * a prefix for all external (non-FORTRAN) function names + for example, -DPREFIX=jl_ transforms fail -> jl_fail + * a prefix for all external FORTRAN function names + for example, -DFPREFIX=jlf_ transforms gs_setup_ -> jlf_gs_setup_ +*/ + +/* the following macro functions like a##b, + but will expand a and/or b if they are themselves macros */ +#define TOKEN_PASTE_(a,b) a##b +#define TOKEN_PASTE(a,b) TOKEN_PASTE_(a,b) + +#ifdef PREFIX +# define PREFIXED_NAME(x) TOKEN_PASTE(PREFIX,x) +#else +# define PREFIXED_NAME(x) x +#endif + +#ifdef FPREFIX +# define FPREFIXED_NAME(x) TOKEN_PASTE(FPREFIX,x) +#else +# define FPREFIXED_NAME(x) x +#endif + +#if defined(UPCASE) +# define FORTRAN_NAME(low,up) FPREFIXED_NAME(up) +# define FORTRAN_UNPREFIXED(low,up) up +#elif defined(UNDERSCORE) +# define FORTRAN_NAME(low,up) FPREFIXED_NAME(TOKEN_PASTE(low,_)) +# define FORTRAN_UNPREFIXED(low,up) TOKEN_PASTE(low,_) +#else +# define FORTRAN_NAME(low,up) FPREFIXED_NAME(low) +# define FORTRAN_UNPREFIXED(low,up) low +#endif + +#endif + diff --git a/src/jl/odep_info.py b/src/jl/odep_info.py new file mode 100755 index 0000000..620d0ec --- /dev/null +++ b/src/jl/odep_info.py @@ -0,0 +1,50 @@ +#!/usr/bin/python + +import sys, os, re + +obj_files = sys.argv[1:] + +defined = dict((x,set([])) for x in obj_files) +undefined = dict((x,set([])) for x in obj_files) +nm_re = re.compile("[0-9a-fA-F]*\s*([BCDRTU])\s+([A-Za-z_][A-Za-z_0-9]*)\s*") +def nm_match(x): return ( nm_re.match(line) for line in os.popen('nm -g '+x) ) +def nm_line(x,m): + if m.group(1)=='U': undefined[x].add(m.group(2)) + else: defined[x].add(m.group(2)) +[ [ nm_line(x,m) for m in nm_match(x) if m!=None ] for x in obj_files ] + +def closure(seq,f): + v = [], [x for x in seq], set(x for x in seq) + while len(v[1]): [(v[1].append(y),v[2].add(y)) for y in + f((lambda x: (v[0].append(x),x)[1])(v[1].pop())) if not y in v[2]] + return v[0] + +needs={} +def get_needs(x): + if not needs.has_key(x): + needs[x]=[y for y in obj_files if len(defined[y]&undefined[x])] + return needs[x] +deps = dict((x,closure(get_needs(x),get_needs)) for x in obj_files) + +for x in deps: + print x,'depends on',reduce((lambda a,b: a+" "+b),deps[x],"") +print + +results = [ os.path.splitext(x)[0] for x in obj_files if 'main' in defined[x] ] +print "RESULTS="+reduce((lambda a,b: a+" "+b),results,"") +print + +def need_X(objs): + for x in objs: + if "XOpenDisplay" in undefined[x]: return True + return False + +for x in results: + objs = deps[x+'.o']; + if not (x+'.o') in objs: objs.append(x+'.o') + sobjs = reduce((lambda a,b: a+" "+b),objs,"") + if need_X(objs): + print x+":"+sobjs+" ; @echo LINK $@; $(LINKCMD) $^ -lX11 -o $@" + else: + print x+":"+sobjs+" ; @echo LINK $@; $(LINKCMD) $^ -o $@" + diff --git a/src/jl/rand_elt_test.c b/src/jl/rand_elt_test.c new file mode 100644 index 0000000..1e11dae --- /dev/null +++ b/src/jl/rand_elt_test.c @@ -0,0 +1,169 @@ +#include +#include +#include "c99.h" +#include "types.h" +#include "name.h" +#include "poly.h" +#include "lob_bnd.h" + +static double det_2(const double A[4]) { return A[0]*A[3]-A[1]*A[2]; } + +static double quad_2(const double x0, const double g[2], const double H[3], + const double r[2]) +{ + return x0 + (g[0]*r[0]+g[1]*r[1]) + + ( r[0] * (H[0]*r[0]+H[1]*r[1]) + + r[1] * (H[1]*r[0]+H[2]*r[1]) )/2; +} + +static void quad_2_grad(double grad[2], const double g[2], const double H[3], + const double r[2]) +{ + grad[0] = g[0] + (H[0]*r[0]+H[1]*r[1]); + grad[1] = g[1] + (H[1]*r[0]+H[2]*r[1]); +} + +static double quad_2_jac(const double g[4], const double H[6], + const double r[2]) +{ + double J[4]; + quad_2_grad(J ,g ,H ,r); + quad_2_grad(J+2,g+2,H+3,r); + return det_2(J); +} + +static double det_3(const double A[9]) +{ + const double a = A[4]*A[8]-A[5]*A[7], + b = A[5]*A[6]-A[3]*A[8], + c = A[3]*A[7]-A[4]*A[6]; + return A[0]*a+A[1]*b+A[2]*c; +} + +static double quad_3(const double x0, const double g[3], const double H[6], + const double r[3]) +{ + return x0 + (g[0]*r[0]+g[1]*r[1]+g[2]*r[2]) + + ( r[0] * (H[0]*r[0]+H[1]*r[1]+H[2]*r[2]) + + r[1] * (H[1]*r[0]+H[3]*r[1]+H[4]*r[2]) + + r[2] * (H[2]*r[0]+H[4]*r[1]+H[5]*r[2]) )/2; +} + +static void quad_3_grad(double grad[3], const double g[3], const double H[6], + const double r[3]) +{ + grad[0] = g[0] + (H[0]*r[0]+H[1]*r[1]+H[2]*r[2]); + grad[1] = g[1] + (H[1]*r[0]+H[3]*r[1]+H[4]*r[2]); + grad[2] = g[2] + (H[2]*r[0]+H[4]*r[1]+H[5]*r[2]); +} + +static double quad_3_jac(const double g[9], const double H[18], + const double r[3]) +{ + double J[9]; + quad_3_grad(J ,g ,H ,r); + quad_3_grad(J+3,g+3,H+ 6,r); + quad_3_grad(J+6,g+6,H+12,r); + return det_3(J); +} + +void rand_elt_2(double *x, double *y, + const double *zr, unsigned nr, + const double *zs, unsigned ns) +{ + static int init=0; + static double z4[4], lob_bnd_data[16+3*4*(2*16+1)], + work[2*16*(4+16+1)]; + unsigned i,j; + double x0[2], g[4], H[6], jac[4*4], r[2]; + struct dbl_range jr; + if(!init) { + init=1; + lobatto_nodes(z4,4); + lob_bnd_setup(lob_bnd_data,4,16); + } + do { + for(i=0;i<4;++i) g[i] = -1+2*(rand()/(double)RAND_MAX); + for(i=0;i<6;++i) H[i] =.5*(-1+2*(rand()/(double)RAND_MAX)); + for(j=0;j<4;++j) { r[1] = z4[j]; + for(i=0;i<4;++i) { r[0] = z4[i]; + jac[j*4+i] = quad_2_jac(g,H,r); + } + } + jr = lob_bnd_2(lob_bnd_data,4,16, lob_bnd_data,4,16, jac, work); + /*printf("Jacobian range %g, %g\n", jr.min, jr.max);*/ + } while(jr.max*jr.min<=0); + for(i=0;i< 2;++i) x0[i] = -1+2*(rand()/(double)RAND_MAX); + for(j=0;j +#include +#include +#include "c99.h" +#include "name.h" +#include "types.h" +#include "fail.h" +#include "mem.h" +#include "sort.h" + +#define sarray_permute_ PREFIXED_NAME(sarray_permute_) +#define sarray_permute_buf_ PREFIXED_NAME(sarray_permute_buf_) + +void sarray_permute_(size_t size, void *A, size_t n, uint *perm, void *work) +{ + char *const ar = A, *const item = work; + sint *const fperm = (sint*)perm; + uint i; + for(i=0;iptr, + (char*)buf->ptr + align_as_(align,n*sizeof(uint))); +} diff --git a/src/jl/sarray_sort.h b/src/jl/sarray_sort.h new file mode 100644 index 0000000..77dc653 --- /dev/null +++ b/src/jl/sarray_sort.h @@ -0,0 +1,89 @@ +#ifndef SARRAY_SORT_H +#define SARRAY_SORT_H + +#if !defined(SORT_H) +#warning "sarray_sort.h" requires "sort.h" +#endif + +/*------------------------------------------------------------------------------ + + Array of Structs Sort + + buffer *buf; + typedef struct { ... } T; + T A[n]; + + sarray_sort(T,A,n, field_name,is_long, buf) + - sort A according to the struct field "field_name", + which is a ulong/uint field according as is_long is true/false + + sarray_sort_2(T,A,n, field1,is_long1, field2,is_long2, buf) + - sort A by field1 then field2 + + sarray_permute(T,A,n, perm, work) + - permute A (in-place) + A[0] <- A[perm[0]], etc. + work needs to hold sizeof(T) bytes (i.e., 1 T) + + sarray_permute_buf(T,A,n, buf); + - permute A according to the permutation in buf + A[0] <- A[perm[0]], etc. + where uint *perm = buf->ptr (see "sort.h") + + ----------------------------------------------------------------------------*/ + + +#define sarray_permute_ PREFIXED_NAME(sarray_permute_) +#define sarray_permute_buf_ PREFIXED_NAME(sarray_permute_buf_) + +void sarray_permute_(size_t size, void *A, size_t n, uint *perm, void *work); +void sarray_permute_buf_( + size_t align, size_t size, void *A, size_t n, buffer *buf); + +#define sarray_permute(T,A,n, perm, work) \ + sarray_permute_(sizeof(T),A,n, perm, work) +#define sarray_permute_buf(T,A,n, buf) \ + sarray_permute_buf_(ALIGNOF(T),sizeof(T),A,n,buf) + +#define sarray_sort_field(T,A,n, field,is_long, buf,keep) do { \ + if(is_long) \ + sortp_long(buf,keep, (ulong*)((char*)(A)+offsetof(T,field)),n,sizeof(T)); \ + else \ + sortp (buf,keep, (uint *)((char*)(A)+offsetof(T,field)),n,sizeof(T)); \ +} while (0) + +#define sarray_sort(T,A,n, field,is_long, buf) do { \ + sarray_sort_field(T,A,n, field,is_long, buf,0); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +#define sarray_sort_2(T,A,n, field1,is_long1, field2,is_long2, buf) do { \ + sarray_sort_field(T,A,n, field2,is_long2, buf,0); \ + sarray_sort_field(T,A,n, field1,is_long1, buf,1); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +#define sarray_sort_3(T,A,n, field1,is_long1, field2,is_long2, \ + field3,is_long3, buf) do { \ + sarray_sort_field(T,A,n, field3,is_long3, buf,0); \ + sarray_sort_field(T,A,n, field2,is_long2, buf,1); \ + sarray_sort_field(T,A,n, field1,is_long1, buf,1); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +#define sarray_sort_4(T,A,n, field1,is_long1, field2,is_long2, \ + field3,is_long3, field4,is_long4, buf) do { \ + sarray_sort_field(T,A,n, field4,is_long4, buf,0); \ + sarray_sort_field(T,A,n, field3,is_long3, buf,1); \ + sarray_sort_field(T,A,n, field2,is_long2, buf,1); \ + sarray_sort_field(T,A,n, field1,is_long1, buf,1); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +static void sarray_perm_invert( + uint *const pinv, const uint *const perm, const uint n) +{ + uint i; for(i=0;i +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sort.h" +#include "sarray_sort.h" + +int main() +{ + struct rec { double d; slong l; sint i; float f; }; + buffer buf = {0,0,0}; + struct rec rec[500]; + uint i; + + for(i=0;i<500;++i) { + sint num1 = rand() & 0xff; + slong num2 = rand(); + num2<<=(CHAR_BIT)*sizeof(int)-1; + num2|=rand(); + num2<<=(CHAR_BIT)*sizeof(int)-1; + num2|=rand(); + num2= num2<0?-num2:num2; + rec[i].d = num2; + rec[i].f = num2; + rec[i].l = num2; + rec[i].i = num1; + } + sarray_sort_2(struct rec,rec,500, i,0, l,1, &buf); + for(i=0;i<500;++i) + printf("%g\t%g\t%ld\t%d\n", + rec[i].d,rec[i].f,(long)rec[i].l,(int)rec[i].i); + + printf("\n"); + sarray_sort(struct rec,rec,500, l,1, &buf); + for(i=0;i<500;++i) + printf("%g\t%g\t%ld\t%d\n", + rec[i].d,rec[i].f,(long)rec[i].l,(int)rec[i].i); + buffer_free(&buf); + return 0; +} + diff --git a/src/jl/sarray_transfer.c b/src/jl/sarray_transfer.c new file mode 100644 index 0000000..9eed6ba --- /dev/null +++ b/src/jl/sarray_transfer.c @@ -0,0 +1,197 @@ +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "crystal.h" +#include "sort.h" + +#define sarray_transfer_many PREFIXED_NAME(sarray_transfer_many) +#define sarray_transfer_ PREFIXED_NAME(sarray_transfer_ ) +#define sarray_transfer_ext_ PREFIXED_NAME(sarray_transfer_ext_) + +static void pack_int( + buffer *const data, const unsigned row_size, const uint id, + const char *const restrict input, const uint n, const unsigned size, + const unsigned p_off, const uint *const restrict perm) +{ + const unsigned after = p_off + sizeof(uint), after_len = size-after; + +#define GET_P() memcpy(&p,row+p_off,sizeof(uint)) +#define COPY_ROW() memcpy(out,row,p_off), \ + memcpy((char*)out + p_off,row+after,after_len) + +#define PACK_BODY() do { \ + uint dummy, *len_ptr=&dummy; \ + uint i, p,lp = -(uint)1, len=0; \ + uint *restrict out = buffer_reserve(data, n*(row_size+3)*sizeof(uint)); \ + for(i=0;in = out - (uint*)data->ptr; \ +} while(0) + PACK_BODY(); +#undef COPY_ROW +#undef GET_P +} + +static void pack_ext( + buffer *const data, const unsigned row_size, const uint id, + const char *const restrict input, const uint n, const unsigned size, + const uint *const restrict proc, const unsigned proc_stride, + const uint *const restrict perm) +{ + #define GET_P() p=*(const uint*)((const char*)proc+proc_stride*perm[i]) + #define COPY_ROW() memcpy(out,row,size) + PACK_BODY(); + #undef PACK_BODY + #undef COPY_ROW + #undef GET_P +} + +static void pack_more( + buffer *const data, const unsigned off, const unsigned row_size, + const char *const restrict input, const unsigned size, + const uint *restrict perm) +{ + uint *restrict buf = data->ptr, *buf_end = buf+data->n; + while(buf!=buf_end) { + uint *msg_end = buf+3+buf[2]; buf+=3; + while(buf!=msg_end) + memcpy((char*)buf+off, input+size*(*perm++), size), buf+=row_size; + } +} + +static void unpack_more( + char *restrict out, const unsigned size, + const buffer *const data, const unsigned off, const unsigned row_size) +{ + const uint *restrict buf = data->ptr, *buf_end = buf+data->n; + while(buf!=buf_end) { + const uint *msg_end = buf+3+buf[2]; buf+=3; + while(buf!=msg_end) + memcpy(out, (char*)buf+off, size), out+=size, buf+=row_size; + } +} + +static void unpack_int( + char *restrict out, const unsigned size, const unsigned p_off, + const buffer *const data, const unsigned row_size, int set_src) +{ + const unsigned after = p_off + sizeof(uint), after_len = size-after; + const uint *restrict buf = data->ptr, *buf_end = buf+data->n; + const unsigned pi = set_src ? 1:0; + while(buf!=buf_end) { + const uint p=buf[pi], *msg_end = buf+3+buf[2]; buf+=3; + while(buf!=msg_end) { + memcpy(out,buf,p_off); + memcpy(out+p_off,&p,sizeof(uint)); + memcpy(out+after,(const char *)buf+p_off,after_len); + out+=size, buf+=row_size; + } + } +} + +static uint num_rows(const buffer *const data, const unsigned row_size) +{ + const uint *buf = data->ptr, *buf_end = buf + data->n; + uint n=0; + while(buf!=buf_end) { uint len=buf[2]; n+=len, buf+=len+3; } + return n/row_size; +} + +static uint cap_rows(buffer *const data, const unsigned row_size,const uint max) +{ + uint *buf = data->ptr, *buf_end = buf + data->n; + const uint maxn = max*row_size; + uint n=0; + while(buf!=buf_end) { + uint len=buf[2]; n+=len; + if(nn = (buf-(uint*)data->ptr)+3+buf[2]; + buf+=len+3; + while(buf!=buf_end) { uint len=buf[2]; n+=len, buf+=len+3; } + break; + } + } + return n/row_size; +} + +/* An must be >= 1 */ +uint sarray_transfer_many( + struct array *const *const A, const unsigned *const size, const unsigned An, + const int fixed, const int ext, const int set_src, const unsigned p_off, + const uint *const restrict proc, const unsigned proc_stride, + struct crystal *const cr) +{ + uint n, *perm; + unsigned i,row_size,off,off1; + + off1 = size[0]; + if(!ext) off1 -= sizeof(uint); + row_size=off1; for(i=1;iwork,0, proc,A[0]->n,proc_stride); + + if(!ext) pack_int(&cr->data, row_size, cr->comm.id, A[0]->ptr,A[0]->n,size[0], + p_off, perm); + else pack_ext(&cr->data, row_size, cr->comm.id, A[0]->ptr,A[0]->n,size[0], + proc,proc_stride, perm); + for(off=off1,i=1;idata,off,row_size, A[i]->ptr,size[i], perm),off+=size[i]; + + crystal_router(cr); + + if(!fixed) { + n = num_rows(&cr->data,row_size); + for(i=0;in=n; + } else { + uint max=A[0]->max, an; + for(i=1;imaxmax; + n = cap_rows(&cr->data,row_size, max); + an = n>max?max:n; + for(i=0;in=an; + } + + if(!ext) unpack_int (A[0]->ptr,size[0],p_off, &cr->data, row_size, set_src); + else unpack_more(A[0]->ptr,size[0], &cr->data,0,row_size); + for(off=off1,i=1;iptr,size[i], &cr->data,off,row_size),off+=size[i]; + + return n; +} + + +void sarray_transfer_(struct array *const A, const unsigned size, + const unsigned p_off, const int set_src, + struct crystal *const cr) +{ + sarray_transfer_many(&A,&size,1, 0,0,set_src,p_off, + (uint*)((char*)A->ptr+p_off),size, cr); +} + +void sarray_transfer_ext_(struct array *const A, const unsigned size, + const uint *const proc, const unsigned proc_stride, + struct crystal *const cr) +{ + sarray_transfer_many(&A,&size,1, 0,1,0,0, proc,proc_stride, cr); +} + diff --git a/src/jl/sarray_transfer.h b/src/jl/sarray_transfer.h new file mode 100644 index 0000000..c195e21 --- /dev/null +++ b/src/jl/sarray_transfer.h @@ -0,0 +1,95 @@ +#ifndef SARRAY_TRANSFER_H +#define SARRAY_TRANSFER_H + +#if !defined(CRYSTAL_H) +#warning "sarray_transfer.h" requires "crystal.h" +#endif + +/* + High-level interface for the crystal router. + Given an array of structs, transfers each to the process indicated + by a field of the struct, which gets set to the source process on output. + + For the dynamic "array" type, see "mem.h". + + Requires a "crystal router" object: + + struct comm c; + struct crystal cr; + + comm_init(&c, MPI_COMM_WORLD); + crystal_init(&cr, &c); + + Example sarray_transfer usage: + + struct T { ...; uint proc; ...; }; + struct array A = null_array; + struct T *p, *e; + + // resize A to 100 struct T's, fill up with data + p = array_reserve(struct T, &A, 100), A.n=100; + for(e=p+A.n;p!=e;++p) { + ... + p->proc = ...; + ... + } + + // array A represents the array + // struct T ar[A.n] where &ar[0] == A.ptr + // transfer ar[i] to processor ar[i].proc for each i=0,...,A.n-1: + + sarray_transfer(struct T, A, proc,set_src, &cr); + + // now array A represents a different array with a different size + // struct T ar[A.n] where &ar[0] == A.ptr + // the ordering is arbitrary + // if set_src != 0, ar[i].proc is set to the proc where ar[i] came from + // otherwise ar[i].proc is unchanged (and == this proc id) + + // note: two calls of + sarray_transfer(struct T, A, proc,1, &cr); + // in a row should return A to its original state, up to ordering + + Cleanup: + array_free(&A); + crystal_free(&cr); + comm_free(&c); + + Example sarray_transfer_ext usage: + + struct T { ... }; + struct array A; + uint proc[A.n]; + + // array A represents the array + // struct T ar[A.n] where &ar[0] == A.ptr + // transfer ar[i] to processor proc[i] for each i=0,...,A.n-1: + sarray_transfer_ext(struct T, &A, proc, &cr); + + // no information is available now on where each struct came from + +*/ + +#define sarray_transfer_many PREFIXED_NAME(sarray_transfer_many) +#define sarray_transfer_ PREFIXED_NAME(sarray_transfer_ ) +#define sarray_transfer_ext_ PREFIXED_NAME(sarray_transfer_ext_) + +uint sarray_transfer_many( + struct array *const *const A, const unsigned *const size, const unsigned An, + const int fixed, const int ext, const int set_src, const unsigned p_off, + const uint *const restrict proc, const unsigned proc_stride, + struct crystal *const cr); +void sarray_transfer_(struct array *const A, const unsigned size, + const unsigned p_off, const int set_src, + struct crystal *const cr); +void sarray_transfer_ext_(struct array *const A, const unsigned size, + const uint *const proc, const unsigned proc_stride, + struct crystal *const cr); + +#define sarray_transfer(T,A,proc_field,set_src,cr) \ + sarray_transfer_(A,sizeof(T),offsetof(T,proc_field),set_src,cr) + +#define sarray_transfer_ext(T,A,proc,proc_stride,cr) \ + sarray_transfer_ext_(A,sizeof(T),proc,proc_stride,cr) + +#endif diff --git a/src/jl/sarray_transfer_test.c b/src/jl/sarray_transfer_test.c new file mode 100644 index 0000000..aaf3b7f --- /dev/null +++ b/src/jl/sarray_transfer_test.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "sort.h" +#include "sarray_sort.h" +#include "crystal.h" +#include "sarray_transfer.h" + +typedef struct { + double d; + ulong l,l2; + uint i; + uint p; +} r_work; + +int main(int narg, char *arg[]) +{ + comm_ext world; int np; + struct comm comm; + struct crystal crystal; + struct array A, A0=null_array; r_work *row, *row_0; + uint i; +#ifdef MPI + MPI_Init(&narg,&arg); + world = MPI_COMM_WORLD; + MPI_Comm_size(world,&np); +#else + world=0, np=1; +#endif + + comm_init(&comm,world); + crystal_init(&crystal,&comm); + + array_init(r_work,&A,np*3), A.n=np*3, row=A.ptr; + for(i=0;i %02d: %08x %08x %d %g\n", + (int)comm.id,(int)row[i].p,(int)row[i].i, + (int)row[i].l,(int)row[i].p,row[i].d); + + array_cat(r_work,&A0, row,A.n); + + sarray_transfer(r_work,&A, p,1, &crystal); + + row=A.ptr; + for(i=0;i +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" + +#define T unsigned int +#define SORT_SUFFIX _ui +#include "sort_imp.h" +#undef SORT_SUFFIX +#undef T + +#if defined(USE_LONG) || defined(GLOBAL_LONG) +# define T unsigned long +# define SORT_SUFFIX _ul +# include "sort_imp.h" +# undef SORT_SUFFIX +# undef T +#endif + +#if defined(USE_LONG_LONG) || defined(GLOBAL_LONG_LONG) +# define T unsigned long long +# define SORT_SUFFIX _ull +# include "sort_imp.h" +# undef SORT_SUFFIX +# undef T +#endif diff --git a/src/jl/sort.h b/src/jl/sort.h new file mode 100644 index 0000000..eaeeb95 --- /dev/null +++ b/src/jl/sort.h @@ -0,0 +1,76 @@ +#ifndef SORT_H +#define SORT_H + +#if !defined(TYPES_H) || !defined(MEM_H) +#warning "sort.h" requires "types.h" and "mem.h" +/* types.h defines uint, ulong + mem.h defines buffer */ +#endif + +/*------------------------------------------------------------------------------ + + Sort + + O(n) stable sort with good performance for all n + + sortv (uint *out, const uint *A, uint n, uint stride, buffer *buf) + sortv_long(ulong *out, const ulong *A, uint n, uint stride, buffer *buf) + + sortp (buffer *buf, int perm_start, const uint *A, uint n, uint stride) + sortp_long(buffer *buf, int perm_start, const ulong *A, uint n, uint stride) + + A, n, stride : specifices the input (stride is in bytes!) + out : the sorted values on output + + For the value sort, (sortv*) + A and out may alias (A == out) exactly when stride == sizeof(T) + + For the permutation sort, (sortp*) + The permutation can be both input (when start_perm!=0) and output, + following the convention that it is always at the start of the buffer buf: + uint *perm = buf->ptr; + The permutation denotes the ordering + A[perm[0]], A[perm[1]], ..., A[perm[n-1]] + (assuming stride == sizeof(uint) or sizeof(ulong) as appropriate) + and is re-arranged stably to give a sorted ordering. + Specifying start_perm==0 is equivalent to specifying + perm[i] = i, i=0,...,n-1 + for an initial permutation (but may be faster). + The buffer will be expanded as necessary to accomodate the permutation + and the required scratch space. + + Most code calls these routines indirectly via the higher-level routine + sarray_sort for sorting arrays of structs (see "sarray_sort.h"). + + ----------------------------------------------------------------------------*/ + +#define sortv_ui PREFIXED_NAME(sortv_ui) +#define sortv_ul PREFIXED_NAME(sortv_ul) +#define sortv_ull PREFIXED_NAME(sortv_ull) +#define sortp_ui PREFIXED_NAME(sortp_ui) +#define sortp_ul PREFIXED_NAME(sortp_ul) +#define sortp_ull PREFIXED_NAME(sortp_ull) + +#define sortv TYPE_LOCAL(sortv_ui,sortv_ul,sortv_ull) +#define sortp TYPE_LOCAL(sortp_ui,sortp_ul,sortp_ull) +#define sortv_long TYPE_GLOBAL(sortv_ui,sortv_ul,sortv_ull) +#define sortp_long TYPE_GLOBAL(sortp_ui,sortp_ul,sortp_ull) + +void sortv_ui(unsigned *out, const unsigned *A, uint n, unsigned stride, + buffer *restrict buf); +void sortv_ul(unsigned long *out, + const unsigned long *A, uint n, unsigned stride, + buffer *restrict buf); +uint *sortp_ui(buffer *restrict buf, int start_perm, + const unsigned *restrict A, uint n, unsigned stride); +uint *sortp_ul(buffer *restrict buf, int start_perm, + const unsigned long *restrict A, uint n, unsigned stride); +#if defined(USE_LONG_LONG) || defined(GLOBAL_LONG_LONG) +void sortv_ull(unsigned long long *out, + const unsigned long long *A, uint n, unsigned stride, + buffer *restrict buf); +uint *sortp_ull(buffer *restrict buf, int start_perm, + const unsigned long long *restrict A, uint n, unsigned stride); +#endif + +#endif diff --git a/src/jl/sort_imp.h b/src/jl/sort_imp.h new file mode 100644 index 0000000..08b05d1 --- /dev/null +++ b/src/jl/sort_imp.h @@ -0,0 +1,543 @@ +#if !defined(T) || !defined(SORT_SUFFIX) +#error sort_imp.h not meant to be compiled by itself +#endif + +#define sort_data TOKEN_PASTE(sort_data ,SORT_SUFFIX) +#define radix_count TOKEN_PASTE(radix_count ,SORT_SUFFIX) +#define radix_offsets TOKEN_PASTE(radix_offsets ,SORT_SUFFIX) +#define radix_zeros TOKEN_PASTE(radix_zeros ,SORT_SUFFIX) +#define radix_passv TOKEN_PASTE(radix_passv ,SORT_SUFFIX) +#define radix_sortv TOKEN_PASTE(radix_sortv ,SORT_SUFFIX) +#define radix_passp0_b TOKEN_PASTE(radix_passp0_b ,SORT_SUFFIX) +#define radix_passp_b TOKEN_PASTE(radix_passp_b ,SORT_SUFFIX) +#define radix_passp_m TOKEN_PASTE(radix_passp_m ,SORT_SUFFIX) +#define radix_passp_e TOKEN_PASTE(radix_passp_e ,SORT_SUFFIX) +#define radix_passp0_be TOKEN_PASTE(radix_passp0_be,SORT_SUFFIX) +#define radix_passp_be TOKEN_PASTE(radix_passp_be, SORT_SUFFIX) +#define radix_sortp TOKEN_PASTE(radix_sortp ,SORT_SUFFIX) +#define merge_sortv TOKEN_PASTE(merge_sortv ,SORT_SUFFIX) +#define merge_copy_perm TOKEN_PASTE(merge_copy_perm,SORT_SUFFIX) +#define merge_sortp0 TOKEN_PASTE(merge_sortp0 ,SORT_SUFFIX) +#define merge_sortp TOKEN_PASTE(merge_sortp ,SORT_SUFFIX) +#define heap_sortv TOKEN_PASTE(heap_sortv ,SORT_SUFFIX) + +#define sortv PREFIXED_NAME(TOKEN_PASTE(sortv,SORT_SUFFIX)) +#define sortp PREFIXED_NAME(TOKEN_PASTE(sortp,SORT_SUFFIX)) + +typedef struct { T v; uint i; } sort_data; + +#define INC_PTR(A,stride) ((A)=(T*)((char*)(A)+(stride))) +#define INDEX_PTR(A,stride,i) (*(T*)((char*)(A)+(i)*(stride))) + +/*------------------------------------------------------------------------------ + + Radix Sort + + stable; O(n+k) time and extra storage + where k = (digits in an int) * 2^(bits per digit) + (e.g. k = 4 * 256 = 1024 for 32-bit ints with 8-bit digits) + + brief description: + input sorted stably on each digit, starting with the least significant + counting sort is used for each digit: + a pass through the input counts the occurences of each digit value + on a second pass, each input has a known destination + + tricks: + all counting passes are combined into one + the counting pass also computes the inclusive bit-wise or of all inputs, + which is used to skip digit positions for which all inputs have zeros + + ----------------------------------------------------------------------------*/ + +#define STATIC_DIGIT_BUCKETS 1 + +#define DIGIT_BITS 8 +#define DIGIT_VALUES (1<i) count[i][val&DIGIT_MASK]++, val>>=DIGIT_BITS +#define COUNT_DIGIT_02(n,i) COUNT_DIGIT_01(n,i); COUNT_DIGIT_01(n,i+ 1) +#define COUNT_DIGIT_04(n,i) COUNT_DIGIT_02(n,i); COUNT_DIGIT_02(n,i+ 2) +#define COUNT_DIGIT_08(n,i) COUNT_DIGIT_04(n,i); COUNT_DIGIT_04(n,i+ 4) +#define COUNT_DIGIT_16(n,i) COUNT_DIGIT_08(n,i); COUNT_DIGIT_08(n,i+ 8) +#define COUNT_DIGIT_32(n,i) COUNT_DIGIT_16(n,i); COUNT_DIGIT_16(n,i+16) +#define COUNT_DIGIT_64(n,i) COUNT_DIGIT_32(n,i); COUNT_DIGIT_32(n,i+32) + +static T radix_count( + uint (*restrict count)[DIGIT_VALUES], + const T *restrict A, const T *const end, const unsigned stride) +{ + T bitorkey = 0; + memset(count,0,COUNT_SIZE*sizeof(uint)); + do { + T val=*A; + bitorkey|=val; + COUNT_DIGIT_64(DIGITS,0); + /* above macro expands to: + if(DIGITS> 0) count[ 0][val&DIGIT_MASK]++, val>>=DIGIT_BITS; + if(DIGITS> 1) count[ 1][val&DIGIT_MASK]++, val>>=DIGIT_BITS; + ... + if(DIGITS>63) count[63][val&DIGIT_MASK]++, val>>=DIGIT_BITS; + */ + } while(INC_PTR(A,stride),A!=end); + return bitorkey; +} + +#undef COUNT_DIGIT_01 +#undef COUNT_DIGIT_02 +#undef COUNT_DIGIT_04 +#undef COUNT_DIGIT_08 +#undef COUNT_DIGIT_16 +#undef COUNT_DIGIT_32 +#undef COUNT_DIGIT_64 + +static void radix_offsets(uint *restrict c) +{ + uint *const ce = c+DIGIT_VALUES; + uint sum = 0; + do { + const uint c0=c[0], c1=c[1], c2=c[2], c3=c[3]; + const uint o1=sum+c0, o2=o1+c1, o3=o2+c2; + c[0]=sum, c[1]=o1, c[2]=o2, c[3]=o3; + sum = o3+c3; + c+=4; + } while(c!=ce); +} + +static unsigned radix_zeros( + T bitorkey, uint (*restrict count)[DIGIT_VALUES], + unsigned *restrict shift, uint **restrict offsets) +{ + unsigned digits=0, sh=0; uint *c = &count[0][0]; + do { + if(bitorkey&DIGIT_MASK) *shift++ = sh, *offsets++ = c, ++digits, + radix_offsets(c); + } while(bitorkey>>=DIGIT_BITS,sh+=DIGIT_BITS,c+=DIGIT_VALUES,sh!=VALUE_BITS); + return digits; +} + +static void radix_passv( + const T *restrict A, const T *const end, const unsigned stride, + const unsigned sh, uint *const restrict off, T *const restrict out) +{ + do out[off[(*A>>sh)&DIGIT_MASK]++] = *A; while(INC_PTR(A,stride),A!=end); +} + +static void radix_sortv( + T *out, const T *A, const uint n, const unsigned stride, + T *work, uint (*restrict count)[DIGIT_VALUES]) +{ + const T *const end = &INDEX_PTR(A,stride,n); + T bitorkey = radix_count(count, A,end,stride); + unsigned shift[DIGITS]; uint *offsets[DIGITS]; + const unsigned digits = radix_zeros(bitorkey,count,shift,offsets); + if(digits==0) { + memset(out,0,n*sizeof(T)); + } else { + T *src, *dst; unsigned d; + if(out==A || (digits&1)==0) dst=out,src=work; + else src=out,dst=work; + radix_passv(A,end,stride,shift[0],offsets[0],src); + for(d=1;d!=digits;++d) { + T *t; + radix_passv(src,src+n,sizeof(T),shift[d],offsets[d],dst); + t=src,src=dst,dst=t; + } + if(src!=out) memcpy(out,src,n*sizeof(T)); + } +} + +static void radix_passp0_b( + const T *restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off, + sort_data *const restrict out) +{ + uint i=0; + do { + T v = *A; + sort_data *d = &out[off[(v>>sh)&DIGIT_MASK]++]; + d->v=v, d->i=i++; + } while(INC_PTR(A,stride),i!=n); +} + +static void radix_passp_b( + const uint *restrict p, + const T *const restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off, + sort_data *const out) +{ + const uint *const pe = p+n; + do { + uint j = *p++; + T v = INDEX_PTR(A,stride,j); + sort_data *d = &out[off[(v>>sh)&DIGIT_MASK]++]; + d->v=v, d->i=j; + } while(p!=pe); +} + +static void radix_passp_m( + const sort_data *restrict src, const sort_data *const end, + const unsigned sh, uint *const restrict off, + sort_data *const restrict out) +{ + do { + sort_data *d = &out[off[(src->v>>sh)&DIGIT_MASK]++]; + d->v=src->v,d->i=src->i; + } while(++src!=end); +} + +static void radix_passp_e( + const sort_data *restrict src, const sort_data *const end, + const unsigned sh, uint *const restrict off, + uint *const restrict out) +{ + do out[off[(src->v>>sh)&DIGIT_MASK]++]=src->i; while(++src!=end); +} + +static void radix_passp0_be( + uint *const restrict out, + const T *restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off) +{ + uint i=0; + do out[off[(*A>>sh)&DIGIT_MASK]++]=i++; while(INC_PTR(A,stride),i!=n); +} + +static void radix_passp_be( + uint *restrict p, + const T *restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off, + sort_data *restrict work) +{ + uint *q = p, *const qe = p+n; + uint *w = &work[0].i; + do { + uint j = *q++; + T v = INDEX_PTR(A,stride,j); + w[off[(v>>sh)&DIGIT_MASK]++]=j; + } while(q!=qe); + memcpy(p,w,n*sizeof(uint)); +} + +static void radix_sortp( + uint *restrict idx, uint perm_start, + const T *restrict A, const uint n, const unsigned stride, + sort_data *restrict work, + uint (*restrict count)[DIGIT_VALUES]) +{ + T bitorkey = radix_count(count, A,&INDEX_PTR(A,stride,n),stride); + unsigned shift[DIGITS]; uint *offsets[DIGITS]; + unsigned digits = radix_zeros(bitorkey,count,shift,offsets); + if(digits==0) { + if(!perm_start) { uint i=0; do *idx++=i++; while(i!=n); } + } else if(digits==1) { + if(perm_start) radix_passp_be (idx,A,n,stride,shift[0],offsets[0],work); + else radix_passp0_be(idx,A,n,stride,shift[0],offsets[0]); + } else { + sort_data *src, *dst; unsigned d; + if((digits&1)==0) dst=work,src=dst+n; + else src=work,dst=src+n; + if(perm_start) radix_passp_b (idx,A,n,stride,shift[0],offsets[0],src); + else radix_passp0_b( A,n,stride,shift[0],offsets[0],src); + for(d=1;d!=digits-1;++d) { + sort_data *t; + radix_passp_m(src,src+n,shift[d],offsets[d],dst); + t=src,src=dst,dst=t; + } + radix_passp_e(src,src+n,shift[d],offsets[d],idx); + } +} + +/*------------------------------------------------------------------------------ + + Merge Sort + + stable; O(n log n) time + + ----------------------------------------------------------------------------*/ + +#define MERGE_2(p,v) \ + if(VAL(v[1])3) odd<<=1,odd|=(n&1),n>>=1,c<<=1,b^=1; \ + } else \ + base-=n-(odd&1),n<<=1,n-=(odd&1),odd>>=1,c>>=1; \ + if(c==0) break; \ + p = buf[b]+base; \ + if(n==2) { \ + DATA v[2]; SETVAL(v[0],i), SETVAL(v[1],i+1); \ + MERGE_2(p,v); \ + i+=2; \ + } else if(n==3) { \ + DATA v[3]; SETVAL(v[0],i), SETVAL(v[1],i+1), SETVAL(v[2],i+2); \ + MERGE_3(p,v); \ + i+=3; \ + } else { \ + const uint na = n>>1, nb = (n+1)>>1; \ + const DATA *restrict ap = buf[b^1]+base, *const ae = ap+na; \ + DATA *restrict bp = p+na, *const be = bp+nb; \ + for(;;) { \ + if(VAL((*bp))i; while(p!=pe);*/ + uint n_by_8 = (n+7)/8; + switch(n%8) { + case 0: do { *idx++ = (p++)->i; + case 7: *idx++ = (p++)->i; + case 6: *idx++ = (p++)->i; + case 5: *idx++ = (p++)->i; + case 4: *idx++ = (p++)->i; + case 3: *idx++ = (p++)->i; + case 2: *idx++ = (p++)->i; + case 1: *idx++ = (p++)->i; + } while (--n_by_8 > 0); + } +} + +static void merge_sortp0( + uint *restrict idx, + const T *restrict A, const uint An, const unsigned stride, + sort_data *restrict work) +{ + sort_data *buf[2]; buf[0]=work+An,buf[1]=work; +#define DATA sort_data +#define VAL(x) x.v +#define SETVAL(x,ai) x.v=*A,INC_PTR(A,stride),x.i=ai + MERGE_SORT(); +#undef SETVAL +#undef VAL +#undef DATA + merge_copy_perm(idx,buf[0],An); +} + +static void merge_sortp( + uint *restrict idx, + const T *const restrict A, const uint An, const unsigned stride, + sort_data *restrict work) +{ + sort_data *buf[2]; buf[0]=work+An,buf[1]=work; +#define DATA sort_data +#define VAL(x) x.v +#define SETVAL(x,ai) x.i=idx[ai],x.v=INDEX_PTR(A,stride,x.i) + MERGE_SORT(); +#undef SETVAL +#undef VAL +#undef DATA + merge_copy_perm(idx,buf[0],An); +} + +#undef MERGE_SORT +#undef MERGE_3 +#undef MERGE_2 + +/*------------------------------------------------------------------------------ + + Heap Sort + + in-place, stability unobservable; O(n log n) time + + ----------------------------------------------------------------------------*/ +static void heap_sortv(T *const restrict A, unsigned n) +{ + unsigned i; + /* build heap */ + for(i=1;i>1; + if(A[p] >= item) continue; + do A[h]=A[p], h=p, p=(p-1)>>1; while(h && A[p] < item); + A[h] = item; + } + /* extract */ + for(i=n-1;i;--i) { + T item = A[i]; + unsigned h = 0; + A[i] = A[0]; + for(;;) { + unsigned ch = 1+(h<<1), r = ch+1; + if(r=i || item >= A[ch]) break; + A[h]=A[ch], h=ch; + } + A[h] = item; + } +} + + +/*------------------------------------------------------------------------------ + + Hybrid Stable Sort + + low-overhead merge sort when n is small, + otherwise asymptotically superior radix sort + + result = O(n) sort with good performance for all n + + A, n, stride : specifices the input, stride in bytes + out : the sorted values on output + + For the value sort, + A and out may alias (A == out) exactly when stride == sizeof(T), + in which case heap sort is used for small sizes + + For the permutation sort, + the permutation can be both input (when start_perm!=0) and output, + following the convention that it is always at the start of the buffer buf; + the buffer will be expanded as necessary to accomodate the permutation + and the required scratch space + + ----------------------------------------------------------------------------*/ + +void sortv(T *out, const T *A, uint n, unsigned stride, buffer *restrict buf) +{ + if(nptr); + } + } + } else if(STATIC_DIGIT_BUCKETS) { + static uint count[DIGITS][DIGIT_VALUES]; + buffer_reserve(buf,n*sizeof(T)); + radix_sortv(out, A,n,stride, (T*)buf->ptr,count); + } else { + T *restrict work; + uint (*restrict count)[DIGIT_VALUES]; + const size_t count_off=align_as(uint,n*sizeof(T)); + buffer_reserve(buf,count_off+sizeof(uint[DIGITS][DIGIT_VALUES])); + work = buf->ptr; + count = (uint(*)[DIGIT_VALUES])((char*)buf->ptr+count_off); + radix_sortv(out, A,n,stride, work,count); + } +} + +uint *sortp(buffer *restrict buf, int start_perm, + const T *restrict A, uint n, unsigned stride) +{ + uint *restrict perm; + sort_data *restrict work; + size_t work_off=align_as(sort_data,n*sizeof(uint)); + if(nptr; + work = (sort_data*)((char*)buf->ptr+work_off); + if(n<2) { + if(n==1) *perm=0; + } else { + if(start_perm) merge_sortp (perm, A,n,stride, work); + else merge_sortp0(perm, A,n,stride, work); + } + } else if(STATIC_DIGIT_BUCKETS){ + static uint count[DIGITS][DIGIT_VALUES]; + buffer_reserve(buf,work_off+2*n*sizeof(sort_data)); + perm = buf->ptr; + work = (sort_data*)((char*)buf->ptr+work_off); + radix_sortp(perm,start_perm, A,n,stride, work,count); + } else { + uint (*restrict count)[DIGIT_VALUES]; + const size_t count_off=align_as(uint,work_off+2*n*sizeof(sort_data)); + buffer_reserve(buf,count_off+sizeof(uint[DIGITS][DIGIT_VALUES])); + perm = buf->ptr; + work = (sort_data*)((char*)buf->ptr+work_off); + count = (uint(*)[DIGIT_VALUES])((char*)buf->ptr+count_off); + radix_sortp(perm,start_perm, A,n,stride, work,count); + } + return perm; +} + +#undef STATIC_DIGIT_BUCKETS + +#undef DIGIT_BITS +#undef DIGIT_VALUES +#undef DIGIT_MASK +#undef CEILDIV +#undef DIGITS +#undef VALUE_BITS +#undef COUNT_SIZE + +#undef INDEX_PTR +#undef INC_PTR + +#undef sortp +#undef sortv + +#undef merge_sortp +#undef merge_sortp0 +#undef merge_sortv +#undef radix_sortp +#undef radix_passp_be +#undef radix_passp0_be +#undef radix_passp_e +#undef radix_passp_m +#undef radix_passp_b +#undef radix_passp0_b +#undef radix_sortv +#undef radix_passv +#undef radix_zeros +#undef radix_offsets +#undef radix_count +#undef sort_data + diff --git a/src/jl/sort_test.c b/src/jl/sort_test.c new file mode 100644 index 0000000..acd0bb3 --- /dev/null +++ b/src/jl/sort_test.c @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sort.h" + +#define SMALL 22 +#define NUM 500 +#define SI 9 + +ulong A[NUM][SI], Av[NUM]; +uint B[NUM][SI], Bv[NUM]; + +uint P[NUM], Q[NUM]; + +int main() +{ + buffer buf = {0,0,0}; + uint i; + + /*buffer_init(&buf, sortp_long_worksize(NUM,0));*/ + +#if 0 + printf("\nsource:\n"); +#endif + for(i=0;i!=NUM;++i) { + A[i][0]=rand(); + A[i][0]<<=CHAR_BIT*sizeof(int)-1; + A[i][0]^=rand(); + A[i][0]<<=CHAR_BIT*sizeof(int)-1; + A[i][0]^=rand(); + if(0) A[i][0]&=0x000ff00; + B[i][0]=A[i][0]; +#if 0 + printf("%016lx\t%016lx\n",(unsigned long)A[i][0],(unsigned long)B[i][0]); +#endif + } +#if 0 + printf("\n"); +#endif + printf("merge sort:\n"); + for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i; + sortv_long(Av, &A[0][0],SMALL,sizeof(ulong[SI]), &buf); + sortp_long(&buf,0, &A[0][0],SMALL,sizeof(ulong[SI])); + memcpy(P,buf.ptr,SMALL*sizeof(uint)); + memcpy(buf.ptr,Q,SMALL*sizeof(uint)); + sortp_long(&buf,1, &A[0][0],SMALL,sizeof(ulong[SI])); + memcpy(Q,buf.ptr,SMALL*sizeof(uint)); + for(i=0;i!=SMALL;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)A[P[i]][0], + A[P[i]][0]==A[Q[i]][0], + Av[i]==A[P[i]][0]); + printf("\n"); + printf("radix sort:\n"); + for(i=0;i!=NUM;++i) Q[i]=NUM-1-i; + sortv_long(Av, &A[0][0],NUM,sizeof(ulong[SI]), &buf); + sortp_long(&buf,0, &A[0][0],NUM,sizeof(ulong[SI])); + memcpy(P,buf.ptr,NUM*sizeof(uint)); + memcpy(buf.ptr,Q,NUM*sizeof(uint)); + sortp_long(&buf,1, &A[0][0],NUM,sizeof(ulong[SI])); + memcpy(Q,buf.ptr,NUM*sizeof(uint)); + for(i=0;i!=NUM;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)A[P[i]][0], + A[P[i]][0]==A[Q[i]][0], + Av[i]==A[P[i]][0]); + + printf("\nsmall integers:\n"); + printf("\n"); + + printf("heap sort:\n"); + for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i; + sortv(Q, Q,SMALL,sizeof(uint), &buf); + for(i=0;i!=SMALL;++i) printf("\t%u\n",(unsigned)Q[i]); + + printf("merge sort:\n"); + for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i; + sortv(Bv, &B[0][0],SMALL,sizeof(uint[SI]), &buf); + sortp(&buf,0, &B[0][0],SMALL,sizeof(uint[SI])); + memcpy(P,buf.ptr,SMALL*sizeof(uint)); + memcpy(buf.ptr,Q,SMALL*sizeof(uint)); + sortp(&buf,1, &B[0][0],SMALL,sizeof(uint[SI])); + memcpy(Q,buf.ptr,SMALL*sizeof(uint)); + for(i=0;i!=SMALL;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)B[P[i]][0], + B[P[i]][0]==B[Q[i]][0], + B[P[i]][0]==Bv[i]); + printf("\n"); + printf("radix sort:\n"); + for(i=0;i!=NUM;++i) Q[i]=NUM-1-i; + sortv(Bv, &B[0][0],NUM,sizeof(uint[SI]), &buf); + sortp(&buf,0, &B[0][0],NUM,sizeof(uint[SI])); + memcpy(P,buf.ptr,NUM*sizeof(uint)); + memcpy(buf.ptr,Q,NUM*sizeof(uint)); + sortp(&buf,1, &B[0][0],NUM,sizeof(uint[SI])); + memcpy(Q,buf.ptr,NUM*sizeof(uint)); + for(i=0;i!=NUM;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)B[P[i]][0], + B[P[i]][0]==B[Q[i]][0], + B[P[i]][0]==Bv[i]); + buffer_free(&buf); + return 0; +} + diff --git a/src/jl/sort_test2.c b/src/jl/sort_test2.c new file mode 100644 index 0000000..4481a16 --- /dev/null +++ b/src/jl/sort_test2.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sort.h" +#include "rdtsc.h" + +#if 1 + +DEFINE_HW_COUNTER() + +#define N (1<<20) + +ulong A[N], out[N]; +uint P[N]; + +int main() +{ + buffer buf = null_buffer; + uint i; + unsigned long long tic, toc; + unsigned r; + #define TIME(t, repeat, what) do { \ + for(r=repeat;r;--r) { what; } \ + tic = getticks(); \ + for(r=repeat;r;--r) { what; } \ + toc = getticks(); \ + t = toc-tic; \ + } while(0) + + for(i=0;i!=N;++i) { + A[i]=rand(); + A[i]<<=CHAR_BIT*sizeof(int)-1; + A[i]^=rand(); + A[i]<<=CHAR_BIT*sizeof(int)-1; + A[i]^=rand(); + if(0) A[i]&=0x000ff00; + } + + for(i=N;i;i>>=1) { + unsigned long long t; + TIME(t, (N/i), + sortv_long(out, A,i,sizeof(ulong), &buf)); + printf("sortv %d : %g cycles per item\n", + (int)i, t/(double)(N/i)/(double)i); + } + + for(i=N;i;i>>=1) { + unsigned long long t; + TIME(t, (N/i), + sortp_long(&buf,0, A,i,sizeof(ulong))); + printf("sortp %d : %g cycles per item\n", + (int)i, t/(double)(N/i)/(double)i); + } + + buffer_free(&buf); + return 0; +} + +#else + +int main() +{ + return 0; +} + +#endif + diff --git a/src/jl/spchol_test.c b/src/jl/spchol_test.c new file mode 100644 index 0000000..5fc6596 --- /dev/null +++ b/src/jl/spchol_test.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sparse_cholesky.h" + +int main() +{ +#define x -1 + + uint i,n=7; + uint Aj [] = {0,2, 1,2,6, 0,1,2, 3,5,6, 4,5, 3,4,5, 1,3,6}; + double A[] = {2,x, 2,x,x, x,x,2, 2,x,x, 2,x, x,x,2, x,x,2}; +#undef x + uint Arp[] = {0,2,5,8,11,13,16,19}; + double x[7], b[7] = {0,0,0,0, 0,0,0}; + uint o[7] = {0,2,1,6,3,5,4}; +/* + uint i,n=10; + uint Aj [] = {0,2,7, 1,4,9, 0,2,6, 3,8,9, 1,4,8,9, 5,6,7, 2,5,6, 0,5,7,8,9, 3,4,7,8, 1,3,4,7,9}; + real A [] = {3,x,x, 2,x,x, x,2,x, 2,x,x, x,3,x,x, 2,x,x, x,x,2, x,x,4,x,x, x,x,x,3, x,x,x,x,4}; +#undef x + uint Arp[] = {0, 3, 6, 9, 12, 16, 19, 22, 27, 31, 36}; + real b[] = {1,2,3,4,5, 6,7,8,9,10}; +*/ + struct sparse_cholesky data; + buffer buf; + buffer_init(&buf,4); + sparse_cholesky_factor(n,Arp,Aj,A,&data,&buf); + + for(i=0;i +# define tensor_dot(a,b,n) cblas_ddot((int)(n),a,1,b,1) +# define tensor_mxv(y,ny,A,x,nx) \ + cblas_dgemv(CblasColMajor,CblasNoTrans,(int)ny,(int)nx, \ + 1.0,A,(int)ny,x,1,0.0,y,1) +# define tensor_mtxv(y,ny,A,x,nx) \ + cblas_dgemv(CblasColMajor,CblasTrans,(int)nx,(int)ny, \ + 1.0,A,(int)nx,x,1,0.0,y,1) +# define tensor_mxm(C,nc,A,na,B,nb) \ + cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans, \ + (int)nc,(int)nb,(int)na,1.0, \ + A,(int)nc,B,(int)na,0.0,C,(int)nc) +# define tensor_mtxm(C,nc,A,na,B,nb) \ + cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans, \ + (int)nc,(int)nb,(int)na,1.0, \ + A,(int)na,B,(int)na,0.0,C,(int)nc) +#else +# define tensor_dot PREFIXED_NAME(tensor_dot ) +# define tensor_mtxm PREFIXED_NAME(tensor_mtxm) +double tensor_dot(const double *a, const double *b, uint n); + +/* C (nc x nb) = [A (na x nc)]^T * B (na x nb); all column-major */ +void tensor_mtxm(double *C, uint nc, + const double *A, uint na, const double *B, uint nb); +# if defined(USE_NAIVE_BLAS) +# define tensor_mxv PREFIXED_NAME(tensor_mxv ) +# define tensor_mtxv PREFIXED_NAME(tensor_mtxv) +# define tensor_mxm PREFIXED_NAME(tensor_mxm ) +/* y = A x */ +void tensor_mxv(double *y, uint ny, const double *A, const double *x, uint nx); + +/* y = A^T x */ +void tensor_mtxv(double *y, uint ny, const double *A, const double *x, uint nx); + +/* C (nc x nb) = A (nc x na) * B (na x nb); all column-major */ +void tensor_mxm(double *C, uint nc, + const double *A, uint na, const double *B, uint nb); +# else +# define nek_mxm FORTRAN_UNPREFIXED(mxm,MXM) +/* C (na x nc) = A (na x nb) * B (nb x nc); all column-major */ +void nek_mxm(const double *A, const uint *na, + const double *B, const uint *nb, + double *C, const uint *nc); +/* C (nc x nb) = A (nc x na) * B (na x nb); all column-major */ +static void tensor_mxm(double *C, uint nc, + const double *A, uint na, const double *B, uint nb) +{ nek_mxm(A,&nc,B,&na,C,&nb); } + +/* y = A x */ +static void tensor_mxv(double *y, uint ny, + const double *A, const double *x, uint nx) +{ uint one=1; nek_mxm(A,&ny,x,&nx,y,&one); } + +/* y = A^T x */ +static void tensor_mtxv(double *y, uint ny, + const double *A, const double *x, uint nx) +{ uint one=1; nek_mxm(x,&one,A,&nx,y,&ny); } + +# endif +#endif + +/*-------------------------------------------------------------------------- + 1-,2-,3-d Tensor Application of Row Vectors (for Interpolation) + + the 3d case: + v = tensor_i3(Jr,nr, Js,ns, Jt,nt, u, work) + gives v = [ Jr (x) Js (x) Jt ] u + where Jr, Js, Jt are row vectors (interpolation weights) + u is nr x ns x nt in column-major format (inner index is r) + v is a scalar + --------------------------------------------------------------------------*/ + +static double tensor_i1(const double *Jr, uint nr, const double *u) +{ + return tensor_dot(Jr,u,nr); +} + +/* work holds ns doubles */ +static double tensor_i2(const double *Jr, uint nr, + const double *Js, uint ns, + const double *u, double *work) +{ + tensor_mtxv(work,ns, u, Jr,nr); + return tensor_dot(Js,work,ns); +} + +/* work holds ns*nt + nt doubles */ +static double tensor_i3(const double *Jr, uint nr, + const double *Js, uint ns, + const double *Jt, uint nt, + const double *u, double *work) +{ + double *work2 = work+nt; + tensor_mtxv(work2,ns*nt, u, Jr,nr); + tensor_mtxv(work ,nt , work2, Js,ns); + return tensor_dot(Jt,work,nt); +} + +/*-------------------------------------------------------------------------- + 1-,2-,3-d Tensor Application of Row Vectors + for simultaneous Interpolation and Gradient computation + + the 3d case: + v = tensor_ig3(g, wtr,nr, wts,ns, wtt,nt, u, work) + gives v = [ Jr (x) Js (x) Jt ] u + g_0 = [ Dr (x) Js (x) Jt ] u + g_1 = [ Jr (x) Ds (x) Jt ] u + g_2 = [ Jr (x) Js (x) Dt ] u + where Jr,Dr,Js,Ds,Jt,Dt are row vectors, + Jr=wtr, Dr=wtr+nr, etc. + (interpolation & derivative weights) + u is nr x ns x nt in column-major format (inner index is r) + v is a scalar, g is an array of 3 doubles + --------------------------------------------------------------------------*/ + +static double tensor_ig1(double g[1], + const double *wtr, uint nr, + const double *u) +{ + g[0] = tensor_dot(wtr+nr,u,nr); + return tensor_dot(wtr ,u,nr); +} + +/* work holds 2*nr doubles */ +static double tensor_ig2(double g[2], + const double *wtr, uint nr, + const double *wts, uint ns, + const double *u, double *work) +{ + tensor_mxm(work,nr, u,ns, wts,2); + g[0] = tensor_dot(wtr+nr,work ,nr); + g[1] = tensor_dot(wtr ,work+nr,nr); + return tensor_dot(wtr ,work ,nr); +} + +/* work holds 2*nr*ns + 3*nr doubles */ +static double tensor_ig3(double g[3], + const double *wtr, uint nr, + const double *wts, uint ns, + const double *wtt, uint nt, + const double *u, double *work) +{ + const uint nrs = nr*ns; + double *a = work, *b = work+2*nrs, *c=b+2*nr; + tensor_mxm(a,nrs, u,nt, wtt,2); + tensor_mxm(b,nr, a,ns, wts,2); + tensor_mxv(c,nr, a+nrs, wts,ns); + g[0] = tensor_dot(b , wtr+nr, nr); + g[1] = tensor_dot(b+nr, wtr , nr); + g[2] = tensor_dot(c , wtr , nr); + return tensor_dot(b , wtr , nr); +} + +/* + out - nr x ns + u - mr x ms + Jrt - mr x nr, Jst - ms x ns + work - nr x ms +*/ +static void tensor_2t(double *out, + const double *Jrt, uint nr, uint mr, + const double *Jst, uint ns, uint ms, + const double *u, double *work) +{ + tensor_mtxm(work,nr, Jrt,mr, u,ms); + tensor_mxm(out,nr, work,ms, Jst,ns); +} + +/* + out - nr x ns x nt + u - mr x ms x mt + Jrt - mr x nr, Jst - ms x ns, Jtt - mt x nt + work - nr*ms*mt + nr*ns*mt = nr*(ms+ns)*mt +*/ +static void tensor_3t(double *out, + const double *Jrt, uint nr, uint mr, + const double *Jst, uint ns, uint ms, + const double *Jtt, uint nt, uint mt, + const double *u, double *work) +{ + const uint nrs=nr*ns, mst=ms*mt, nrms=nr*ms; + uint k; + double *work2 = work+nr*mst; + double *p; const double *q; + tensor_mtxm(work,nr, Jrt,mr, u,mst); + for(k=0,p=work2,q=work;k/dev/null +if [ $? -ne 0 ]; then + echo "FATAL ERROR: Cannot find $F77!" + exit 1 +fi +\rm test_f77.o 2>/dev/null + +# basic compiler test +cat > test_f77.f << _ACEOF + subroutine test + end +_ACEOF +$F77 -c test_f77.f >/dev/null +if [ ! -f test_f77.o ]; then + echo "FATAL ERROR: Basic compiler test for $F77 failed!" + exit 1 +fi +\rm test_f77.* 2>/dev/null + +# test C compiler +which `echo $CC | awk '{print $1}'` 1>/dev/null +if [ $? -ne 0 ]; then + echo "FATAL ERROR: Cannot find $CC!" + exit 1 +fi +\rm test_cc.o 2>/dev/null + +cat > test_cc.c << _ACEOF + void function(){} +_ACEOF +$CC -c test_cc.c 1>/dev/null +if [ ! -f test_cc.o ]; then + echo "FATAL ERROR: Basic compiler test for $CC failed!" + exit 1 +fi +\rm test_cc.* 2>/dev/null + +# initial clean-up +rm -f nekbone 2>/dev/null + +# Check if the compiler adds an underscore to external functions +UNDERSCORE=false +cat > test_underscore.f << _ACEOF + subroutine underscore_test + call byte_write + end +_ACEOF +$F77 -c test_underscore.f 2>&1 >/dev/null +nm test_underscore.o | grep byte_write_ 1>/dev/null +if [ $? -eq 0 ] +then + UNDERSCORE=true +fi +\rm test_underscore.* 2>/dev/null + +# trying to figure which compiler the wrapper is using +F77ok=0 + +F77comp_=`$F77 -showme 2>/dev/null 1>.tmp || true` +F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true` +if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 +fi + +if [ $F77ok -eq 0 ]; then + F77comp_=`$F77 -show 2>/dev/null 1>.tmp || true` + F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true` + if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 + fi +fi + +if [ $F77ok -eq 0 ]; then + F77comp_=`$F77 -craype-verbose 2>/dev/null 1>.tmp || true` + F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true` + if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 + fi +fi + +if [ $F77ok -eq 0 ]; then + F77comp=`echo $F77 | awk '{print $1}'` + if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 + fi +fi + +\rm -f .tmp +if [ $F77ok -eq 0 ]; then + F77comp="unknown" +fi + +CFE_FLAG="" +# assign F77 compiler specific flags +case $F77comp in + *pgf*) P="-r8 -Mpreprocess" + ;; + *gfortran*) P="-fcray-pointer -fdefault-real-8 -x f77-cpp-input" + ;; + *ifort*) P="-r8 -fpconstant -fpp" + CFE_FLAG="-xHost" + ;; + *pathf*) P="-r8 -cpp -fno-second-underscore" + ;; + *xlf*) P="-qrealsize=8 -qdpc=e -qsuffix=cpp=f" + PPPO="-WF," + F77="${F77} -qsuppress=cmpmsg" + ;; + *ftn*) P="-r8 -Mpreprocess" + ;; + *sunf*) P="-r8const -xtypemap=real:64 -fpp" + ;; + *open*) P="-r8 -cpp -fno-second-underscore" + ;; + *) echo "ERROR: Unable to detect compiler!" + echo " - don't know how to promote datatype REAL to 8 bytes" + echo " - don't know how to invoke the C pre-processor (CPP) before compilation" + echo " Please edit the makefile and specify the requested compiler flags using the P variable." + echo "" + P="" + NOCOMPILE=1 + read;; +esac +export PPPO + +# Check ptr size +cat > tmp.c << _ACEOF +#include +#include +int main() +{ + int *p;printf("%li\n",sizeof(p)); +} +_ACEOF +$CC $CFE_FLAG tmp.c 2>&1>/dev/null +ptrSize=`./a.out` +rm tmp.c a.out +if [ "$ptrSize" == "8" ] +then + PPLIST="${PPLIST} PTRSIZE8" +fi + +# set preprocessor symbols +if [ "$IFMPI" == "false" -o "$IFMPI" == "no" ]; then + IFMPI=false +else + # default + IFMPI=true + PPLIST="${PPLIST} MPI" +fi +export IFMPI + +# Check size of long int +cat > tmp.c << _ACEOF +#include +#include +int main() +{ + int i; + i=sizeof(long int); + printf("%i\n",i); +} +_ACEOF +$CC $CFE_FLAG tmp.c 2>&1>/dev/null +longIntTest=`./a.out` +rm tmp.c a.out +if [ "$longIntTest" == "8" ] +then + PPLIST="${PPLIST} LONGINT8" +fi + +if [ "$UNDERSCORE" == "true" ]; then + PPLIST="${PPLIST} UNDERSCORE" +fi + +PPLIST="${PPLIST} GLOBAL_LONG_LONG" + +MXM_USER="mxm_std.o blas.o" +echo $PPLIST | grep 'BGP' >/dev/null +if [ $? -eq 0 ]; then + MXM_USER="mxm_std.o bg_aligned3.o bg_mxm44.o bg_mxm44_uneven.o bg_mxm3.o blas.o" + OPT_FLAGS_STD="-qarch=450 -qtune=450" + OPT_FLAGS_MAG="-O5 -qarch=450d -qtune=450" +fi +echo $PPLIST | grep 'BLAS_MXM' >/dev/null +if [ $? -eq 0 ]; then + MXM_USER="mxm_std.o" +fi + +# set optimization flags +L0="\$(G) -O0" +L2="\$(G) -O2" +L3="\$(G) -O3" +L4="\$(L3)" + +# user specified opt flags +if [ "$OPT_FLAGS_STD" != "" ]; then + echo $OPT_FLAGS_STD | grep "\-O." 1>/dev/null + if [ $? -eq 0 ]; then + L2="\$(G) $OPT_FLAGS_STD" + L3="\$(G) $OPT_FLAGS_STD" + else + L2="\$(G) -O2 $OPT_FLAGS_STD" + L3="\$(G) -O3 $OPT_FLAGS_STD" + fi +fi + +if [ "$OPT_FLAGS_MAG" != "" ]; then + L4="\$(G) $OPT_FLAGS_MAG" +fi + +if [ "$USR_LIB" != "" ]; then + USR_LFLAGS="${USR_LFLAGS} ${USR_LIB}" +fi + +# tweak makefile template +echo "generating makefile ..." +rm -rf makefile 2>/dev/null + +sed -e "s:^F77[ ]*=.*:F77\:=$F77:" \ +-e "s:^CC[ ]*=.*:CC\:=$CC:" \ +-e "s:^G[ ]*=.*:G\:=$G:" \ +-e "s:^OPT_FLAGS[ ]*=.*:OPT_FLAGS\:=$OPT_FLAGS:" \ +-e "s/^P[ ]*=.*/P:=$P/" \ +-e "s/^L0[ ]*=.*/L0=$L0/" \ +-e "s/^L2[ ]*=.*/L2=$L2/" \ +-e "s/^L3[ ]*=.*/L3=$L3/" \ +-e "s/^L4[ ]*=.*/L4=$L4/" \ +-e "s/^PPPO[ ]*=.*/PPPO=$PPPO/" \ +-e "s/^PPS[ ]*=.*/PPS=$PPLIST/" \ +-e "s:^MXM[ ]*=.*:MXM=$MXM_USER:" \ +-e "s/^IFMPI[ ]*=.*/IFMPI:=$IFMPI/" \ +-e "s:^USR[ ]*=.*:USR\:=$USR:" \ +-e "s:^USR_LFLAGS[ ]*=.*:USR_LFLAGS\:=$USR_LFLAGS:" \ +-e "s:^S[ ]*=.*:S\:=${SOURCE_ROOT}:" ./makefile.template >.makefile + +echo $G | grep '\-g' 1>/dev/null +if [ $? -eq 0 ]; then + sed 's/-O[1-4]/-O0/g' .makefile > .makefile.tmp + mv .makefile.tmp .makefile + echo "Activate DEBUG mode" +fi + +if [ "$USR" != "" ]; then + echo "###########################################################" >> makefile + echo "include makefile_usr.inc" >> .makefile +fi + +if [ -f .makefile ]; then + sed -e "1i\\ +### makefile automatically created by makenek `date +"%m/%d/%Y %T"` ###" \ +-e "s:^CASEDIR[ ]*=.*:CASEDIR\:=${CASEDIR}:" \ +-e "s:^CASENAME[ ]*=.*:CASENAME\:=${CASENAME}:" .makefile > makefile +else + echo "ERROR: Nek Makefile could not be created!" + exit 1 +fi +\rm .makefile 2>/dev/null + +# tweak SIZE file +if [ -f "./SIZE" ]; then + cat SIZE | grep -i 'lxo' >/dev/null +else + echo "FATAL ERROR: Cannot find SIZE" + exit 1 +fi + +if [ $NOCOMPILE -eq 1 ]; then + exit 0 +fi diff --git a/src/math.f b/src/math.f new file mode 100644 index 0000000..48a24c3 --- /dev/null +++ b/src/math.f @@ -0,0 +1,1402 @@ +c----------------------------------------------------------------------- + SUBROUTINE BLANK(A,N) + CHARACTER*1 A(1) + CHARACTER*1 BLNK + SAVE BLNK + DATA BLNK /' '/ + + DO 10 I=1,N + A(I)=BLNK + 10 CONTINUE + RETURN + END +c----------------------------------------------------------------------- + SUBROUTINE VSQ (A,N) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I) = A(I)**2 + RETURN + END +c----------------------------------------------------------------------- + SUBROUTINE VSQRT(A,N) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I) = SQRT(A(I)) + RETURN + END +c----------------------------------------------------------------------- + subroutine invers2(a,b,n) + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=1./B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine invcol1(a,n) + REAL A(1) + + DO 100 I=1,N + A(I)=1./A(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine invcol2(a,b,n) + + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=A(I)/B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine invcol3(a,b,c,n) + REAL A(1),B(1),C(1) + + + DO 100 I=1,N + A(I)=B(I)/C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine col4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + DO 100 I=1,N + A(I)=B(I)*C(I)*D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine Xaddcol3(a,b,c,n) + REAL A(1),B(1),C(1) + + DO 100 I=1,N + A(I)=A(I)+B(I)*C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine addcol4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + DO 100 I=1,N + A(I)=A(I)+B(I)*C(I)*D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine ascol5 (a,b,c,d,e,n) + REAL A(1),B(1),C(1),D(1),E(1) + + DO 100 I=1,N + A(I) = B(I)*C(I)-D(I)*E(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine sub2(a,b,n) + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=A(I)-B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine sub3(a,b,c,n) + REAL A(1),B(1),C(1) + + DO 100 I=1,N + A(I)=B(I)-C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine subcol3(a,b,c,n) + REAL A(1),B(1),C(1) + + + DO 100 I=1,N + A(I)=A(I)-B(I)*C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine subcol4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + + DO 100 I=1,N + A(I)=A(I)-B(I)*C(I)*D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine rzero(a,n) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I ) = 0.0 + return + END +c----------------------------------------------------------------------- + subroutine izero(a,n) + INTEGER A(1) + + DO 100 I = 1, N + 100 A(I ) = 0 + return + END +c----------------------------------------------------------------------- + subroutine ione(a,n) + INTEGER A(1) + DO 100 I = 1, N + 100 A(I ) = 1 + return + END +c----------------------------------------------------------------------- + subroutine rone(a,n) + DIMENSION A(1) + DO 100 I = 1, N + 100 A(I ) = 1.0 + return + END +c----------------------------------------------------------------------- + subroutine cfill(a,b,n) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I) = B + return + END +c----------------------------------------------------------------------- + subroutine ifill(ia,ib,n) + DIMENSION IA(1) + + DO 100 I = 1, N + 100 IA(I) = IB + return + END +c----------------------------------------------------------------------- + subroutine copy(a,b,n) + real a(1),b(1) + + do i=1,n + a(i)=b(i) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine chcopy(a,b,n) + CHARACTER*1 A(1), B(1) + + DO 100 I = 1, N + 100 A(I) = B(I) + return + END + + subroutine icopy(a,b,n) + INTEGER A(1), B(1) + + DO 100 I = 1, N + 100 A(I) = B(I) + return + END +c----------------------------------------------------------------------- + subroutine i8copy(a,b,n) + INTEGER*8 A(1), B(1) + + DO 100 I = 1, N + 100 A(I) = B(I) + return + END +c----------------------------------------------------------------------- + subroutine chsign(a,n) + REAL A(1) + + DO 100 I=1,N + A(I) = -A(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine cmult(a,const,n) + REAL A(1) + + DO 100 I=1,N + A(I)=A(I)*CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine cadd(a,const,n) + REAL A(1) + + DO 100 I=1,N + A(I)=A(I)+CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine iadd(i1,iscal,n) + DIMENSION I1(1) + + DO 10 I=1,N + I1(I)=I1(I)+ISCAL + 10 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine cadd2(a,b,const,n) + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=B(I)+CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + real function vlmin(vec,n) + REAL VEC(1) + TMIN = 99.0E20 + + DO 100 I=1,N + TMIN = MIN(TMIN,VEC(I)) + 100 CONTINUE + VLMIN = TMIN + return + END +c----------------------------------------------------------------------- + integer function ivlmin(vec,n) + integer vec(1),tmin + if (n.eq.0) then + ivlmin=0 + return + endif + tmin = 8888888 + do i=1,n + tmin = min(tmin,vec(i)) + enddo + ivlmin = tmin + return + end +c----------------------------------------------------------------------- + integer function ivlmax(vec,n) + integer vec(1),tmax + if (n.eq.0) then + ivlmax=0 + return + endif + TMAX =-8888888 + do i=1,n + TMAX = MAX(TMAX,VEC(I)) + enddo + Ivlmax = tmax + return + end +c----------------------------------------------------------------------- + real function vlmax(vec,n) + REAL VEC(1) + TMAX =-99.0E20 + do i=1,n + TMAX = MAX(TMAX,VEC(I)) + enddo + VLMAX = TMAX + return + END +c----------------------------------------------------------------------- + real function vlamax(vec,n) + REAL VEC(1) + TAMAX = 0.0 + + DO 100 I=1,N + TAMAX = MAX(TAMAX,ABS(VEC(I))) + 100 CONTINUE + VLAMAX = TAMAX + return + END +c----------------------------------------------------------------------- + real function vlsum(vec,n) + REAL VEC(1) + + SUM = 0. + + DO 100 I=1,N + SUM=SUM+VEC(I) + 100 CONTINUE + VLSUM = SUM + return + END +c----------------------------------------------------------------------- + subroutine vcross (u1,u2,u3,v1,v2,v3,w1,w2,w3,n) +C +C Compute a Cartesian vector cross product. +C + DIMENSION U1(1),U2(1),U3(1) + DIMENSION V1(1),V2(1),V3(1) + DIMENSION W1(1),W2(1),W3(1) + + + DO 100 I=1,N + U1(I) = V2(I)*W3(I) - V3(I)*W2(I) + U2(I) = V3(I)*W1(I) - V1(I)*W3(I) + U3(I) = V1(I)*W2(I) - V2(I)*W1(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine vdot2 (dot,u1,u2,v1,v2,n) +C +C Compute a Cartesian vector dot product. 2-d version +C + DIMENSION DOT(1) + DIMENSION U1(1),U2(1) + DIMENSION V1(1),V2(1) + + + DO 100 I=1,N + DOT(I) = U1(I)*V1(I) + U2(I)*V2(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine vdot3 (dot,u1,u2,u3,v1,v2,v3,n) +C +C Compute a Cartesian vector dot product. 3-d version +C + DIMENSION DOT(1) + DIMENSION U1(1),U2(1),U3(1) + DIMENSION V1(1),V2(1),V3(1) + + + DO 100 I=1,N + DOT(I) = U1(I)*V1(I) + U2(I)*V2(I) + U3(I)*V3(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine addtnsr(s,h1,h2,h3,nx,ny,nz) +C +C Map and add to S a tensor product form of the three functions H1,H2,H3. +C This is a single element routine used for deforming geometry. +C + DIMENSION H1(1),H2(1),H3(1) + DIMENSION S(NX,NY,NZ) + + DO 200 IZ=1,NZ + DO 200 IY=1,NY + HH = H2(IY)*H3(IZ) + DO 100 IX=1,NX + S(IX,IY,IZ)=S(IX,IY,IZ)+HH*H1(IX) + 100 CONTINUE + 200 CONTINUE + return + END + function ltrunc(string,l) + CHARACTER*1 STRING(L) + CHARACTER*1 BLNK + DATA BLNK/' '/ + + DO 100 I=L,1,-1 + L1=I + IF (STRING(I).NE.BLNK) GOTO 200 + 100 CONTINUE + L1=0 + 200 CONTINUE + LTRUNC=L1 + return + END +c----------------------------------------------------------------------- + function mod1(i,n) +C +C Yields MOD(I,N) with the exception that if I=K*N, result is N. +C + MOD1=0 + IF (I.EQ.0) THEN + return + ENDIF + IF (N.EQ.0) THEN + WRITE(6,*) + $ 'WARNING: Attempt to take MOD(I,0) in function mod1.' + return + ENDIF + II = I+N-1 + MOD1 = MOD(II,N)+1 + return + END +c----------------------------------------------------------------------- + integer function log2(k) + RK=(K) + RLOG=LOG10(RK) + RLOG2=LOG10(2.0) + RLOG=RLOG/RLOG2+0.5 + LOG2=INT(RLOG) + return + END +c----------------------------------------------------------------------- + subroutine iflip(i1,n) + DIMENSION I1(1) + N1=N+1 + N2=N/2 + DO 10 I=1,N2 + ILAST=N1-I + ITMP=I1(ILAST) + I1(ILAST)=I1(I) + I1(I)=ITMP + 10 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine iswap(b,ind,n,temp) + INTEGER B(1),IND(1),TEMP(1) +C*** +C*** SORT ASSOCIATED ELEMENTS BY PUTTING ITEM(JJ) +C*** INTO ITEM(I), WHERE JJ=IND(I). +C*** + DO 20 I=1,N + JJ=IND(I) + TEMP(I)=B(JJ) + 20 CONTINUE + DO 30 I=1,N + 30 B(I)=TEMP(I) + return + END +c----------------------------------------------------------------------- + subroutine col2(a,b,n) + real a(1),b(1) + +!xbm* unroll (10) + do i=1,n + a(i)=a(i)*b(i) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine col2c(a,b,c,n) + real a(1),b(1),c + + do i=1,n + a(i)=a(i)*b(i)*c + enddo + + return + end +c----------------------------------------------------------------------- + subroutine col3(a,b,c,n) + real a(1),b(1),c(1) + +!xbm* unroll (10) + do i=1,n + a(i)=b(i)*c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add2(a,b,n) + real a(1),b(1) + +!xbm* unroll (10) + do i=1,n + a(i)=a(i)+b(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add3(a,b,c,n) + real a(1),b(1),c(1) + +!xbm* unroll (10) + do i=1,n + a(i)=b(i)+c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine addcol3(a,b,c,n) + real a(1),b(1),c(1) + +!xbm* unroll (10) + do i=1,n + a(i)=a(i)+b(i)*c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add2s1(a,b,c1,n) + real a(1),b(1) + + DO 100 I=1,N + A(I)=C1*A(I)+B(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine add2s2(a,b,c1,n) + real a(1),b(1) + + DO 100 I=1,N + A(I)=A(I)+C1*B(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine add3s2(a,b,c,c1,c2,n) + real a(1),b(1),c(1) + + DO 100 I=1,N + A(I)=C1*B(I)+C2*C(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine add4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + DO 100 I=1,N + A(I)=B(I)+C(I)+D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + real function vlsc2(x,y,n) + REAL X(1),Y(1) + + s = 0. + do i=1,n + s = s + x(i)*y(i) + enddo + vlsc2=s + return + end +c----------------------------------------------------------------------- + real function vlsc21(x,y,n) + real x(1),y(1) + + s = 0. + do i=1,n + s = s + x(i)*x(i)*y(i) + enddo + vlsc21=s + return + end + + +C---------------------------------------------------------------------------- +C +C Vector reduction routines which require communication +C on a parallel machine. These routines must be substituted with +C appropriate routines which take into account the specific architecture. +C +C---------------------------------------------------------------------------- + function glsc3(a,b,mult,n) +C +C Perform inner-product in double precision +C + real a(1),b(1),mult(1) + real tmp,work(1) + + tmp = 0.0 + do 10 i=1,n + tmp = tmp + a(i)*b(i)*mult(i) + 10 continue + call gop(tmp,work,'+ ',1) + glsc3 = tmp + return + end +c----------------------------------------------------------------------- + function glsc2(x,y,n) +C +C Perform inner-product in double precision +C + real x(1), y(1) + real tmp,work(1) + + tmp=0.0 + do 10 i=1,n + tmp = tmp+ x(i)*y(i) + 10 continue + CALL GOP(TMP,WORK,'+ ',1) + GLSC2 = TMP + return + END +c----------------------------------------------------------------------- + function glsc23(x,y,z,n) +c +C Perform inner-product x*x*y*z +c + real x(1), y(1),z(1) + real tmp,work(1) + + ds = 0.0 + do 10 i=1,n + ds=ds+x(i)*x(i)*y(i)*z(i) + 10 continue + tmp=ds + call gop(tmp,work,'+ ',1) + glsc23 = tmp + return + end +c----------------------------------------------------------------------- +c real function gl2norm(a,n) + +c include 'SIZE' +c include 'MASS' + +c real a(1) + +c common /scrsf/ w1 (lx1,ly1,lz1,lelt) + +c call col3 (w1,a,a,n) +c call col2 (w1,bm1,n) +c gl2norm = sqrt(glsum (w1,n)/volvm1) + +c return +c end +c----------------------------------------------------------------------- + function glsum (x,n) + DIMENSION X(1) + DIMENSION TMP(1),WORK(1) + TSUM = 0. + DO 100 I=1,N + TSUM = TSUM+X(I) + 100 CONTINUE + TMP(1)=TSUM + CALL GOP(TMP,WORK,'+ ',1) + GLSUM = TMP(1) + return + END +c----------------------------------------------------------------------- + real function glamax(a,n) + REAL A(1) + DIMENSION TMP(1),WORK(1) + TMAX = 0.0 + DO 100 I=1,N + TMAX = MAX(TMAX,ABS(A(I))) + 100 CONTINUE + TMP(1)=TMAX + CALL GOP(TMP,WORK,'M ',1) + GLAMAX=ABS(TMP(1)) + return + END +c----------------------------------------------------------------------- + real function glamin(a,n) + real a(1) + dimension tmp(1),work(1) + tmin = 9.e28 + do 100 i=1,n + tmin = min(tmin,abs(a(i))) + 100 continue + tmp(1)=tmin + call gop(tmp,work,'m ',1) + glamin=abs(tmp(1)) + return + end +c----------------------------------------------------------------------- + function iglmin(a,n) + integer a(1),tmin + integer tmp(1),work(1) + tmin= 999999999 + do i=1,n + tmin=min(tmin,a(i)) + enddo + tmp(1)=tmin + call igop(tmp,work,'m ',1) + iglmin=tmp(1) + return + end +c----------------------------------------------------------------------- + function iglmax(a,n) + integer a(1),tmax + integer tmp(1),work(1) + tmax= -999999999 + do i=1,n + tmax=max(tmax,a(i)) + enddo + tmp(1)=tmax + call igop(tmp,work,'M ',1) + iglmax=tmp(1) + return + end +c----------------------------------------------------------------------- + function iglsum(a,n) + integer a(1),tsum + integer tmp(1),work(1) + tsum= 0 + do i=1,n + tsum=tsum+a(i) + enddo + tmp(1)=tsum + call igop(tmp,work,'+ ',1) + iglsum=tmp(1) + return + end +C----------------------------------------------------------------------- + integer*8 function i8glsum(a,n) + integer*8 a(1),tsum + integer*8 tmp(1),work(1) + tsum= 0 + do i=1,n + tsum=tsum+a(i) + enddo + tmp(1)=tsum + call i8gop(tmp,work,'+ ',1) + i8glsum=tmp(1) + return + end +C----------------------------------------------------------------------- + function glmax(a,n) + REAL A(1) + DIMENSION TMP(1),WORK(1) + TMAX=-99.0e20 + DO 100 I=1,N + TMAX=MAX(TMAX,A(I)) + 100 CONTINUE + TMP(1)=TMAX + CALL GOP(TMP,WORK,'M ',1) + GLMAX=TMP(1) + return + END +c----------------------------------------------------------------------- + function glmin(a,n) + REAL A(1) + DIMENSION TMP(1),WORK(1) + TMIN=99.0e20 + DO 100 I=1,N + TMIN=MIN(TMIN,A(I)) + 100 CONTINUE + TMP(1)=TMIN + CALL GOP(TMP,WORK,'m ',1) + GLMIN = TMP(1) + return + END +c----------------------------------------------------------------------- + subroutine gllog(la,lb) +C +C If ANY LA=LB, then ALL LA=LB. +C + LOGICAL LA,LB + DIMENSION TMP(1),WORK(1) + + TMP(1)=1.0 + IF (LB) THEN + IF (LA) TMP(1)=0.0 + ELSE + IF (.NOT.LA) TMP(1)=0.0 + ENDIF + CALL GOP(TMP,WORK,'* ',1) + IF (TMP(1).EQ.0.0) LA=LB + return + END +c----------------------------------------------------------------------- + function fmdian(a,n,ifok) +C find the Median of the (global) set A + include 'SIZE' + DIMENSION A(1) + DIMENSION WORK1(5),WORK2(5) + DIMENSION GUES(100) + LOGICAL IFOK + + AMP =1.5 + AFAC =1.5 + GMIN =GLMIN(A,N) + GMAX =GLMAX(A,N) + GMIN0=GLMIN(A,N) + GMAX0=GLMAX(A,N) + GUESS=(GMAX+GMIN)/2.0 + EPS =(GMAX-GMIN) + IF (EPS.EQ.0.0) THEN + FMDIAN=GMAX + return + ENDIF + WORK1(1)=N + CALL GOP(WORK1,WORK2,'+ ',1) + NTOT=WORK1(1) + N2 = (NTOT+1)/2 + IF (.NOT.IFOK) THEN + WRITE(6,8) NID,N,(A(I),I=1,N) + WRITE(6,9) NID,NTOT,N2,N,GMIN,GMAX + 8 FORMAT(I5,'N,A:',I5,10(6F10.5,/)) + 9 FORMAT(I5,'mnx:',3I6,2F10.5) + ENDIF +C +C This is the trial loop +C + ITRY=-1 + 10 CONTINUE + ITRY=ITRY+1 + II=ITRY+1 + IF (II.LE.100) GUES(II)=GUESS +C error check for infinite loop + IF (ITRY.GT.2*NTOT) GOTO 9000 + CALL RZERO(WORK1,5) + NLT=0 + NGT=0 + CLT=GMIN0 + CGT=GMAX0 + DO 100 I=1,N + AA=A(I) + IF (AA.NE.GUESS) THEN + IF (AA.LT.GUESS) THEN + NLT=NLT+1 +C CLT - closest value to GUESS Less Than GUESS + IF (AA.GT.CLT) CLT=AA + ENDIF + IF (AA.GT.GUESS) THEN + NGT=NGT+1 +C CGT - closest value to GUESS Greater Than GUESS + IF (AA.LT.CGT) CGT=AA + ENDIF + DUM=1./(EPS+ABS(AA-GUESS)) + WORK1(1)=WORK1(1)+DUM + WORK1(2)=WORK1(2)+DUM*AA + ELSE +C detected values equaling the guess. + WORK1(5)=WORK1(5)+1.0 + ENDIF + 100 CONTINUE +C Invoke vector reduction across processors: + WORK2(1)=CLT + CLT=GLMAX(WORK2,1) + WORK2(1)=CGT + CGT=GLMIN(WORK2,1) + WORK1(3)=NLT + WORK1(4)=NGT + CALL GOP(WORK1,WORK2,'+ ',5) + NLT=WORK1(3) + NGT=WORK1(4) + IF (.NOT.IFOK) THEN + WRITE(6,101) NID,GUESS,CLT,CGT + WRITE(6,102) NID,(WORK1(I),I=1,5) + 101 FORMAT(I5,'Glg:',3F12.5) + 102 FORMAT(I5,'WORK1:',5F12.5) + ENDIF +C +C Done? +C + IF (NLT.GT.N2.OR.NGT.GT.N2) THEN +C we're not done..... + IF (NGT.GT.NLT) THEN +C guess is too low + GMIN=CGT + G2=CGT+MAX(0.,WORK1(2)/WORK1(1)-GUESS)*AMP + IF (G2.GT.GMAX) G2=0.5*(GUESS+GMAX) + EPS=AFAC*ABS(G2-GUESS) +C see that we move at least as far as the next closest value. + GUESS=MAX(G2,CGT) + GOTO 10 + ELSE IF (NLT.GT.NGT) THEN +C guess is too high + GMAX=CLT + G2=CLT+MIN(0.,WORK1(2)/WORK1(1)-GUESS)*AMP + IF (G2.LT.GMIN) G2=0.5*(GUESS+GMIN) + EPS=AFAC*ABS(G2-GUESS) +C see that we move at least as far as the next closest value. + GUESS=MIN(G2,CLT) + GOTO 10 + ENDIF + ELSE +C +C we're done.... + IF (WORK1(5).NE.0) THEN +C the median is (usually) one of the values + FMDIAN=GUESS + IF (WORK1(5).EQ.1.0) THEN + IF (MOD(NTOT,2).EQ.0) THEN + IF (NGT.GT.NLT) THEN + FMDIAN=0.5*(GUESS+CGT) + ELSE + FMDIAN=0.5*(GUESS+CLT) + ENDIF + ELSE + IF (NGT.EQ.NLT) THEN + FMDIAN=GUESS + ELSE IF(NGT.GT.NLT) THEN + FMDIAN=CGT + ELSE + FMDIAN=CLT + ENDIF + ENDIF + ENDIF + ELSE + IF (MOD(NTOT,2).EQ.0) THEN + IF (NGT.EQ.NLT) THEN + FMDIAN=0.5*(CLT+CGT) + ELSE IF(NGT.GT.NLT) THEN + FMDIAN=0.5*(GUESS+CGT) + ELSE + FMDIAN=0.5*(GUESS+CLT) + ENDIF + ELSE + IF (NGT.EQ.NLT) THEN + FMDIAN=GUESS + ELSE IF(NGT.GT.NLT) THEN + FMDIAN=CGT + ELSE + FMDIAN=CLT + ENDIF + ENDIF + ENDIF + + ENDIF + IF (.NOT.IFOK) WRITE(6,*) NID,'FMDIAN2',FMDIAN,(A(I),I=1,N) + return +C +C Error handling +C + 9000 CONTINUE + WRITE(6,11) NTOT,GMIN0,GMAX0,GUESS + 11 FORMAT('ABORTING IN FMDIAN: N,AMIN,AMAX:',I6,3G14.6) + DO 13 I1=1,N,5 + IN=I1+5 + IN=MIN(IN,N) + WRITE(6,12) NID,(A(I),I=I1,IN) + 12 FORMAT(I4,' FMA:',5G14.6) + 13 CONTINUE + DO 15 I1=1,ITRY,5 + IN=I1+5 + IN=MIN(IN,ITRY) + WRITE(6,14) NID,(GUES(I),I=I1,IN) + 14 FORMAT(I4,' FMG:',5G14.6) + 15 CONTINUE + call exitt + END + +C======================================================================== +C Double precision matrix and vector routines +C======================================================================== + +c----------------------------------------------------------------------- + subroutine dcadd(a,const,n) + real*8 A(1),CONST + + DO 100 I=1,N + A(I)=A(I)+CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine dsub2(a,b,n) + real*8 A(1), B(1) + + DO 100 I=1,N + A(I)=A(I)-B(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine dadd2(a,b,n) + real*8 A(1), B(1) + + DO 100 I=1,N + A(I)=A(I)+B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine chswapr(b,L,ind,n,temp) + INTEGER IND(1) + CHARACTER*6 B(1),TEMP(1) + +C*** SORT ASSOCIATED ELEMENTS BY PUTTING ITEM(JJ) +C*** INTO ITEM(I), WHERE JJ=IND(I). +C*** + DO 20 I=1,N + JJ=IND(I) + TEMP(I)=B(JJ) + 20 CONTINUE + DO 30 I=1,N + 30 B(I)=TEMP(I) + return + END +c----------------------------------------------------------------------- + subroutine drcopy(r,d,N) + real*8 d(1) + dimension r(1) + do 10 i=1,n + r(i)=d(i) + 10 continue + return + end + subroutine sorts(xout,xin,work,n) + real xout(1),xin(1),work(1) + call copy(xout,xin,n) + call sort(xout,work,n) + return + end +C +c----------------------------------------------------------------------- + function ivlsum(a,n) + INTEGER A(1) + INTEGER TSUM + if (n.eq.0) then + ivlsum = 0 + return + endif + TSUM=A(1) + DO 100 I=2,N + TSUM=TSUM+A(I) + 100 CONTINUE + IVLSUM=TSUM + return + END +c----------------------------------------------------------------------- + subroutine icadd(a,c,n) + INTEGER A(1),C + DO 100 I = 1, N + 100 A(I) = A(I) + C + return + END + subroutine isort(a,ind,n) +C +C Use Heap Sort (p 231 Num. Rec., 1st Ed.) +C + integer a(1),ind(1) + integer aa + + dO 10 j=1,n + ind(j)=j + 10 continue + + if (n.le.1) return + L=n/2+1 + ir=n + 100 continue + if (l.gt.1) then + l=l-1 + aa = a (l) + ii = ind(l) + else + aa = a(ir) + ii = ind(ir) + a(ir) = a( 1) + ind(ir) = ind( 1) + ir=ir-1 + if (ir.eq.1) then + a(1) = aa + ind(1) = ii + return + endif + endif + i=l + j=l+l + 200 continue + if (j.le.ir) then + if (j.lt.ir) then + if ( a(j).lt.a(j+1) ) j=j+1 + endif + if (aa.lt.a(j)) then + a(i) = a(j) + ind(i) = ind(j) + i=j + j=j+j + else + j=ir+1 + endif + GOTO 200 + endif + a(i) = aa + ind(i) = ii + GOTO 100 + end + subroutine sort(a,ind,n) +C +C Use Heap Sort (p 231 Num. Rec., 1st Ed.) +C + real a(1),aa + integer ind(1) + + dO 10 j=1,n + ind(j)=j + 10 continue + + if (n.le.1) return + L=n/2+1 + ir=n + 100 continue + if (l.gt.1) then + l=l-1 + aa = a (l) + ii = ind(l) + else + aa = a(ir) + ii = ind(ir) + a(ir) = a( 1) + ind(ir) = ind( 1) + ir=ir-1 + if (ir.eq.1) then + a(1) = aa + ind(1) = ii + return + endif + endif + i=l + j=l+l + 200 continue + if (j.le.ir) then + if (j.lt.ir) then + if ( a(j).lt.a(j+1) ) j=j+1 + endif + if (aa.lt.a(j)) then + a(i) = a(j) + ind(i) = ind(j) + i=j + j=j+j + else + j=ir+1 + endif + GOTO 200 + endif + a(i) = aa + ind(i) = ii + GOTO 100 + end +c----------------------------------------------------------------------- + subroutine iswap_ip(x,p,n) + integer x(1),xstart + integer p(1) +c +c In-place permutation: x' = x(p) +c + do k=1,n + if (p(k).gt.0) then ! not swapped + xstart = x(k) + loop_start = k + last = k + do j=k,n + next = p(last) + if (next.lt.0) then + write(6,*) 'Hey! iswap_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(last) = xstart + p(last) = -p(last) + goto 10 + else + x(last) = x(next) + p(last) = -p(last) + last = next + endif + enddo + 10 continue + endif + enddo +c + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine iswapt_ip(x,p,n) + integer x(1),t1,t2 + integer p(1) +c +c In-place permutation: x'(p) = x +c + + do k=1,n + if (p(k).gt.0) then ! not swapped + loop_start = k + next = p(loop_start) + t1 = x(loop_start) + do j=1,n + if (next.lt.0) then + write(6,*) 'Hey! iswapt_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(next) = t1 + p(next) = -p(next) + goto 10 + else + t2 = x(next) + x(next) = t1 + t1 = t2 + nextp = p(next) + p(next) = -p(next) + next = nextp + endif + enddo + 10 continue + endif + enddo + + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine swap_ip(x,p,n) + real x(1),xstart + integer p(1) +c +c In-place permutation: x' = x(p) +c + do k=1,n + if (p(k).gt.0) then ! not swapped + xstart = x(k) + loop_start = k + last = k + do j=k,n + next = p(last) + if (next.lt.0) then + write(6,*) 'Hey! swap_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(last) = xstart + p(last) = -p(last) + goto 10 + else + x(last) = x(next) + p(last) = -p(last) + last = next + endif + enddo + 10 continue + endif + enddo + + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine swapt_ip(x,p,n) + real x(1),t1,t2 + integer p(1) +c +c In-place permutation: x'(p) = x +c + + do k=1,n + if (p(k).gt.0) then ! not swapped + loop_start = k + next = p(loop_start) + t1 = x(loop_start) + do j=1,n + if (next.lt.0) then + write(6,*) 'Hey! swapt_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(next) = t1 + p(next) = -p(next) + goto 10 + else + t2 = x(next) + x(next) = t1 + t1 = t2 + nextp = p(next) + p(next) = -p(next) + next = nextp + endif + enddo + 10 continue + endif + enddo + + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine glvadd(x,w,n) + real x(1),w(1) + call gop(x,w,'+ ',1) + return + end +c----------------------------------------------------------------------- + subroutine add3s12(x,y,z,c1,c2,n) + real x(1),y(1),z(1),c1,c2 + do i=1,n + x(i) = c1*y(i)+c2*z(i) + enddo + return + end +c----------------------------------------------------------------------- + integer*8 function i8glmax(a,n) + integer*8 a(1),tmax + integer*8 tmp(1),work(1) + tmax= -999999 + do i=1,n + tmax=max(tmax,a(i)) + enddo + tmp(1)=tmax + call i8gop(tmp,work,'M ',1) + i8glmax=tmp(1) + if (i8glmax .eq. -999999) i8glmax=0 + return + end +c----------------------------------------------------------------------- + subroutine admcol3(a,b,c,d,n) + REAL A(1),B(1),C(1),D +C + DO 100 I=1,N + A(I)=A(I)+B(I)*C(I)*D + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine add2col2(a,b,c,n) + real a(1),b(1),c(1) + + do i=1,n + a(i) = a(i) + b(i)*c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add2sxy(x,a,y,b,n) + real x(1),y(1) + + do i=1,n + x(i) = a*x(i) + b*y(i) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine col2s2(x,y,s,n) + real x(n),y(n) + + do i=1,n + x(i)=s*x(i)*y(i) + enddo + + return + end +c----------------------------------------------------------------------- + INTEGER FUNCTION INDX1(S1,S2,L2) + CHARACTER*132 S1,S2 + + N1=132-L2+1 + INDX1=0 + IF (N1.LT.1) return + + DO 100 I=1,N1 + I2=I+L2-1 + IF (S1(I:I2).EQ.S2(1:L2)) THEN + INDX1=I + return + ENDIF + 100 CONTINUE + + return + END +c----------------------------------------------------------------------- + diff --git a/src/mpi_dummy.f b/src/mpi_dummy.f new file mode 100644 index 0000000..f6257df --- /dev/null +++ b/src/mpi_dummy.f @@ -0,0 +1,1053 @@ +c*********************************************************************72 + subroutine mpi_scan(data1, data2, n, datatype, + & operation, comm, ierror ) + + integer data1,data2 ! currently hardwired only for integer + + data2 = data1 + + return + end + +c*********************************************************************72 + subroutine mpi_abort ( comm, errorcode, ierror ) + +c*********************************************************************72 +c +cc MPI_ABORT shuts down the processes in a given communicator. +c + implicit none + + integer comm + integer errorcode + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_ABORT:' + write ( *, '(a,i12)' ) + & ' Shut down with error code = ', errorcode + + stop + end + subroutine mpi_allgather ( data1, nsend, sendtype, data2, + & nrecv, recvtype, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_ALLGATHER gathers data from all the processes in a communicator. +c + implicit none + + include "mpi_dummy.h" + + integer nsend + + integer comm + integer data1(nsend) + integer data2(nsend) + integer ierror + integer nrecv + integer recvtype + integer sendtype + + ierror = MPI_SUCCESS + + if ( sendtype .eq. mpi_double_precision ) then + call mpi_copy_double_precision ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_integer ) then + call mpi_copy_integer ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_real ) then + call mpi_copy_real ( data1, data2, nsend, ierror ) + else + ierror = MPI_FAILURE + end if + + return + end + subroutine mpi_allgatherv ( data1, nsend, sendtype, + & data2, nrecv, ndispls, recvtype, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_ALLGATHERV gathers data from all the processes in a communicator. +c + implicit none + + include "mpi_dummy.h" + + integer nsend + + integer comm + integer data1(nsend) + integer data2(nsend) + integer ierror + integer ndispls + integer nrecv + integer recvtype + integer sendtype + + ierror = MPI_SUCCESS + + if ( sendtype .eq. mpi_double_precision ) then + call mpi_copy_double_precision ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_integer ) then + call mpi_copy_integer ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_real ) then + call mpi_copy_real ( data1, data2, nsend, ierror ) + else + ierror = MPI_FAILURE + end if + + return + end + subroutine mpi_allreduce ( data1, data2, n, datatype, + & operation, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_ALLREDUCE carries out a reduction operation. +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer comm + integer data1(n) + integer data2(n) + integer datatype + integer ierror + integer operation + + ierror = MPI_SUCCESS + + if ( datatype .eq. mpi_double_precision ) then + + call mpi_reduce_double_precision ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_integer ) then + + call mpi_reduce_integer ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_integer8 ) then + + call mpi_reduce_integer8( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_real ) then + + call mpi_reduce_real ( + & data1, data2, n, operation, ierror ) + + else + + ierror = MPI_FAILURE + + end if + + return + end + + subroutine mpi_barrier ( comm, ierror ) + +c*********************************************************************72 +c +cc MPI_BARRIER forces processes within a communicator to wait together. +c + implicit none + + integer comm + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + return + end + subroutine mpi_bcast ( data, n, datatype, node, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_BCAST broadcasts data from one process to all others. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + integer node + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_bsend ( data, n, datatype, iproc, itag, + & comm, ierror ) + +c*********************************************************************72 +c +cc MPI_BSEND sends data from one process to another, using buffering. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_BSEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_cart_create ( comm, ndims, dims, periods, + & reorder, comm_cart, ierror ) + +c*********************************************************************72 +c +cc MPI_CART_CREATE creates a communicator for a Cartesian topology. +c + implicit none + + integer ndims + + integer comm + integer comm_cart + integer dims(*) + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + logical periods(*) + logical reorder + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_cart_get ( comm, ndims, dims, periods, + & coords, ierror ) + +c*********************************************************************72 +c +cc MPI_CART_GET returns the "Cartesian coordinates" of the calling process. +c + implicit none + + integer ndims + + integer comm + integer coords(*) + integer dims(*) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + logical periods(*) + + ierror = MPI_SUCCESS + + do i = 1, ndims + coords(i) = 0 + end do + + return + end + subroutine mpi_cart_shift ( comm, idir, idisp, isource, + & idest, ierror ) + +c*********************************************************************72 +c +cc MPI_CART_SHIFT finds the destination and source for Cartesian shifts. +c + implicit none + + integer comm + integer idest + integer idir + integer idisp + integer ierror + integer isource + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + isource = 0 + idest = 0 + + return + end + subroutine mpi_comm_dup ( comm, comm_out, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_DUP duplicates a communicator. +c + implicit none + + integer comm + integer comm_out + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + comm_out = comm + + return + end + subroutine mpi_comm_free ( comm, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_FREE "frees" a communicator. +c + implicit none + + integer comm + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_comm_rank ( comm, me, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_RANK reports the rank of the calling process. +c + implicit none + + integer comm + integer ierror + integer me + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + me = 0 + + return + end + subroutine mpi_comm_size ( comm, nprocs, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_SIZE reports the number of processes in a communicator. +c + implicit none + + integer comm + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + integer nprocs + + ierror = MPI_SUCCESS + nprocs = 1 + + return + end + subroutine mpi_comm_split ( comm, icolor, ikey, comm_new, + & ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_SPLIT splits up a communicator based on a key. +c + implicit none + + integer comm + integer comm_new + integer icolor + integer ierror + integer ikey + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_copy_double_precision ( data1, data2, n, ierror ) + +c*********************************************************************72 +c +cc MPI_COPY_DOUBLE copies a double precision vector. +c + implicit none + + integer n + + double precision data1(n) + double precision data2(n) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_copy_integer ( data1, data2, n, ierror ) + +c*********************************************************************72 +c +cc MPI_COPY_INTEGER copies an integer vector. +c + implicit none + + integer n + + integer data1(n) + integer data2(n) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_copy_real ( data1, data2, n, ierror ) + +c*********************************************************************72 +c + implicit none + + integer n + + real data1(n) + real data2(n) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_finalize ( ierror ) + +c*********************************************************************72 +c +cc MPI_FINALIZE shuts down the MPI library. +c + implicit none + + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_get_count ( istatus, datatype, icount, ierror ) + +c*********************************************************************72 +c +cc MPI_GET_COUNT reports the actual number of items transmitted. +c + implicit none + + integer datatype + integer icount + integer ierror + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_GET_COUNT - Error!' + write ( *, '(a)' ) ' Should not query message from self.' + + return + end + subroutine mpi_init ( ierror ) + +c*********************************************************************72 +c +cc MPI_INIT initializes the MPI library. +c + implicit none + + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_irecv ( data, n, datatype, iproc, itag, + & comm, irequest, ierror ) + +c*********************************************************************72 +c +cc MPI_IRECV receives data from another process. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer irequest + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_IRECV - Error!' + write ( *, '(a)' ) ' Should not recv message from self.' + + return + end + subroutine mpi_isend ( data, n, datatype, iproc, itag, + & comm, request, ierror ) + +c*********************************************************************72 +c +cc MPI_ISEND sends data from one process to another using nonblocking transmission. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + integer request + + request = 0 + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_ISEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_recv ( data, n, datatype, iproc, itag, + & comm, istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_RECV receives data from another process within a communicator. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer istatus + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_RECV - Error!' + write ( *, '(a)' ) ' Should not recv message from self.' + + return + end + subroutine mpi_reduce ( data1, data2, n, datatype, operation, + & receiver, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE carries out a reduction operation. +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer comm + integer data1(n) + integer data2 + integer datatype + integer ierror + integer operation + integer receiver + + ierror = MPI_SUCCESS + + if ( datatype .eq. mpi_double_precision ) then + + call mpi_reduce_double_precision ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_integer ) then + + call mpi_reduce_integer ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_real ) then + + call mpi_reduce_real ( + & data1, data2, n, operation, ierror ) + + else + + ierror = MPI_FAILURE + + end if + + return + end + subroutine mpi_reduce_double_precision ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE_DOUBLE_PRECISION carries out a reduction operation on double precision values. +c + implicit none + + include "mpi_dummy.h" + + integer n + + double precision data1(n) + double precision data2(n) + integer i + integer ierror + integer operation + + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + + subroutine mpi_reduce_integer8 ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer*8 data1(n) + integer*8 data2(n) + integer i + integer ierror + integer operation + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + ierror = MPI_FAILURE + + return + end + + subroutine mpi_reduce_integer ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer data1(n) + integer data2(n) + integer i + integer ierror + integer operation + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + ierror = MPI_FAILURE + + return + end + + subroutine mpi_reduce_real ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE_REAL carries out a reduction operation on reals. +c +c Discussion: +c + implicit none + + include "mpi_dummy.h" + + integer n + + real data1(n) + real data2(n) + integer i + integer ierror + integer operation + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_reduce_scatter ( data1, data2, n, datatype, + & operation, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE_SCATTER collects a message of the same length from each process. +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer comm + integer data1(n) + integer data2(n) + integer datatype + integer ierror + integer operation + + ierror = MPI_SUCCESS + + if ( datatype .eq. mpi_double_precision ) then + call mpi_copy_double_precision ( data1, data2, n, ierror ) + else if ( datatype .eq. mpi_integer ) then + call mpi_copy_integer ( data1, data2, n, ierror ) + else if ( datatype .eq. mpi_real ) then + call mpi_copy_real ( data1, data2, n, ierror ) + else + ierror = MPI_FAILURE + end if + + return + end + subroutine mpi_rsend ( data, n, datatype, iproc, itag, + & comm, ierror ) + +c*********************************************************************72 +c +cc MPI_RSEND "ready sends" data from one process to another. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_RSEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_send ( data, n, datatype, iproc, itag, + & comm, ierror ) + +c*********************************************************************72 +c +cc MPI_SEND sends data from one process to another. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_SEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_wait ( irequest, istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_WAIT waits for an I/O request to complete. +c + implicit none + + integer ierror + integer irequest + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_WAIT - Error!' + write ( *, '(a)' ) ' Should not wait on message from self.' + + return + end + subroutine mpi_waitall ( icount, irequest, istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_WAITALL waits until all I/O requests have completed. +c + implicit none + + integer icount + integer ierror + integer irequest + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_WAITALL - Error!' + write ( *, '(a)' ) ' Should not wait on message from self.' + + return + end + subroutine mpi_waitany ( icount, array_of_requests, index, + & istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_WAITANY waits until one I/O requests has completed. +c + implicit none + + integer array_of_requests(*) + integer icount + integer ierror + integer index + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_WAITANY - Error!' + write ( *, '(a)' ) ' Should not wait on message from self.' + + return + end + function mpi_wtick ( ) + +c*********************************************************************72 +c +cc MPI_WTICK returns the time between clock ticks. +c + implicit none + + double precision mpi_wtick + + mpi_wtick = 1.0D+00 + + return + end + function mpi_wtime ( ) + +c*********************************************************************72 +c +cc MPI_WTIME returns the elapsed wall clock time. +c + implicit none + + real*8 mpi_wtime + real*8 a + integer*8 countval, countrate, countmax + + call system_clock(countval, countrate, countmax) + a = countval + mpi_wtime = a/countrate + + return + end + + subroutine mpi_initialized(mpi_is_initialized, ierr) + + mpi_is_initialized = 0 + ierr = 0 + + return + end + + subroutine mpi_comm_create(icomm,igroup,icommd,ierr) + + icommd = 1 + + return + end + + subroutine mpi_comm_group(icomm,igroup,ierr) + + igroup = 1 + ierr = 0 + + return + end + + subroutine mpi_group_free + + return + end + + subroutine mpi_attr_get(icomm,ikey,ival,iflag,ierr) + + logical iflag + + ival = 999 999 999 ! dummy + + return + end +c----------------------------------------------------------------------- diff --git a/src/mpi_dummy.h b/src/mpi_dummy.h new file mode 100644 index 0000000..0a92b81 --- /dev/null +++ b/src/mpi_dummy.h @@ -0,0 +1,61 @@ +c +c Dummy parameters for MPI F77 stubs +c + integer mpi_comm_world + parameter ( mpi_comm_world = 0 ) +c +c Return values. +c + integer mpi_failure + parameter ( mpi_failure = 1 ) + integer mpi_success + parameter ( mpi_success = 0 ) +c +c recv message status +c + integer mpi_status_size + parameter ( mpi_status_size = 3 ) + integer mpi_source + parameter ( mpi_source = 1 ) + integer mpi_tag + parameter ( mpi_tag = 2 ) + integer mpi_count + parameter ( mpi_count = 3 ) +c +c recv flags +c + integer mpi_any_source + parameter ( mpi_any_source = -1 ) + integer mpi_any_tag + parameter ( mpi_any_tag = -1 ) +c +c data types and sizes +c + integer mpi_integer + parameter ( mpi_integer = 1 ) + integer mpi_integer8 + parameter ( mpi_integer8 = 6 ) + integer mpi_real + parameter ( mpi_real = 2 ) + integer mpi_double_precision + parameter ( mpi_double_precision = 3 ) + integer mpi_logical + parameter ( mpi_logical = 4 ) + integer mpi_character + parameter ( mpi_character = 5 ) +c +c allreduce operations +c + integer mpi_sum + parameter ( mpi_sum = 1 ) + integer mpi_max + parameter ( mpi_max = 2 ) + integer mpi_min + parameter ( mpi_min = 3 ) + integer mpi_product + parameter ( mpi_product = 4 ) +c +c timer +c + external mpi_wtime + real*8 mpi_wtime diff --git a/src/mxm_std.f b/src/mxm_std.f new file mode 100644 index 0000000..5e21cb3 --- /dev/null +++ b/src/mxm_std.f @@ -0,0 +1,4123 @@ + subroutine mxmf2(A,N1,B,N2,C,N3) +c +c unrolled loop version +c + real a(n1,n2),b(n2,n3),c(n1,n3) + + if (n2.le.8) then + if (n2.eq.1) then + call mxf1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxf2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxf3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxf4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxf5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxf6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxf7(a,n1,b,n2,c,n3) + else + call mxf8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxf9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxf10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxf11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxf12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxf13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxf14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxf15(a,n1,b,n2,c,n3) + else + call mxf16(a,n1,b,n2,c,n3) + endif + elseif (n2.le.24) then + if (n2.eq.17) then + call mxf17(a,n1,b,n2,c,n3) + elseif (n2.eq.18) then + call mxf18(a,n1,b,n2,c,n3) + elseif (n2.eq.19) then + call mxf19(a,n1,b,n2,c,n3) + elseif (n2.eq.20) then + call mxf20(a,n1,b,n2,c,n3) + elseif (n2.eq.21) then + call mxf21(a,n1,b,n2,c,n3) + elseif (n2.eq.22) then + call mxf22(a,n1,b,n2,c,n3) + elseif (n2.eq.23) then + call mxf23(a,n1,b,n2,c,n3) + elseif (n2.eq.24) then + call mxf24(a,n1,b,n2,c,n3) + endif + else + call mxm44_0(a,n1,b,n2,c,n3) + endif +c + return + end +c----------------------------------------------------------------------- + subroutine mxf1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf17(a,n1,b,n2,c,n3) +c + real a(n1,17),b(17,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf18(a,n1,b,n2,c,n3) +c + real a(n1,18),b(18,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf19(a,n1,b,n2,c,n3) +c + real a(n1,19),b(19,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf20(a,n1,b,n2,c,n3) +c + real a(n1,20),b(20,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf21(a,n1,b,n2,c,n3) +c + real a(n1,21),b(21,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf22(a,n1,b,n2,c,n3) +c + real a(n1,22),b(22,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf23(a,n1,b,n2,c,n3) +c + real a(n1,23),b(23,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf24(a,n1,b,n2,c,n3) +c + real a(n1,24),b(24,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + $ + a(i,24)*b(24,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxm44_0(a, m, b, k, c, n) +c +c matrix multiply with a 4x4 pencil +c + real a(m,k), b(k,n), c(m,n) + real s11, s12, s13, s14, s21, s22, s23, s24 + real s31, s32, s33, s34, s41, s42, s43, s44 + + mresid = iand(m,3) + nresid = iand(n,3) + m1 = m - mresid + 1 + n1 = n - nresid + 1 + + do i=1,m-mresid,4 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + s44 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + s14 = s14 + a(i,l)*b(l,j+3) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + s24 = s24 + a(i+1,l)*b(l,j+3) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + s34 = s34 + a(i+2,l)*b(l,j+3) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + s44 = s44 + a(i+3,l)*b(l,j+3) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + c(i,j+2) = s13 + c(i,j+3) = s14 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + + c(i+1,j+3) = s24 + c(i+2,j+3) = s34 + c(i+3,j+3) = s44 + enddo +* Residual when n is not multiple of 4 + if (nresid .ne. 0) then + if (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,n) + s21 = s21 + a(i+1,l)*b(l,n) + s31 = s31 + a(i+2,l)*b(l,n) + s41 = s41 + a(i+3,l)*b(l,n) + enddo + c(i,n) = s11 + c(i+1,n) = s21 + c(i+2,n) = s31 + c(i+3,n) = s41 + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + enddo + c(i,j) = s11 + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + c(i,j+1) = s12 + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + c(i,j+2) = s13 + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + endif + endif + enddo + +* Residual when m is not multiple of 4 + if (mresid .eq. 0) then + return + elseif (mresid .eq. 1) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,j) + s12 = s12 + a(m,l)*b(l,j+1) + s13 = s13 + a(m,l)*b(l,j+2) + s14 = s14 + a(m,l)*b(l,j+3) + enddo + c(m,j) = s11 + c(m,j+1) = s12 + c(m,j+2) = s13 + c(m,j+3) = s14 + enddo +* mresid is 1, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n) + enddo + c(m,n) = s11 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s12 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-1) + s12 = s12 + a(m,l)*b(l,n) + enddo + c(m,n-1) = s11 + c(m,n) = s12 + return + else + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-2) + s12 = s12 + a(m,l)*b(l,n-1) + s13 = s13 + a(m,l)*b(l,n) + enddo + c(m,n-2) = s11 + c(m,n-1) = s12 + c(m,n) = s13 + return + endif + elseif (mresid .eq. 2) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + s21 = 0.0d0 + s22 = 0.0d0 + s23 = 0.0d0 + s24 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,j) + s12 = s12 + a(m-1,l)*b(l,j+1) + s13 = s13 + a(m-1,l)*b(l,j+2) + s14 = s14 + a(m-1,l)*b(l,j+3) + + s21 = s21 + a(m,l)*b(l,j) + s22 = s22 + a(m,l)*b(l,j+1) + s23 = s23 + a(m,l)*b(l,j+2) + s24 = s24 + a(m,l)*b(l,j+3) + enddo + c(m-1,j) = s11 + c(m-1,j+1) = s12 + c(m-1,j+2) = s13 + c(m-1,j+3) = s14 + c(m,j) = s21 + c(m,j+1) = s22 + c(m,j+2) = s23 + c(m,j+3) = s24 + enddo +* mresid is 2, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n) + enddo + c(m-1,n) = s11 + c(m,n) = s21 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-1) + s12 = s12 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-1) + s22 = s22 + a(m,l)*b(l,n) + enddo + c(m-1,n-1) = s11 + c(m-1,n) = s12 + c(m,n-1) = s21 + c(m,n) = s22 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-2) + s12 = s12 + a(m-1,l)*b(l,n-1) + s13 = s13 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-2) + s22 = s22 + a(m,l)*b(l,n-1) + s23 = s23 + a(m,l)*b(l,n) + enddo + c(m-1,n-2) = s11 + c(m-1,n-1) = s12 + c(m-1,n) = s13 + c(m,n-2) = s21 + c(m,n-1) = s22 + c(m,n) = s23 + return + endif + else +* mresid is 3 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + + do l=1,k + s11 = s11 + a(m-2,l)*b(l,j) + s12 = s12 + a(m-2,l)*b(l,j+1) + s13 = s13 + a(m-2,l)*b(l,j+2) + s14 = s14 + a(m-2,l)*b(l,j+3) + + s21 = s21 + a(m-1,l)*b(l,j) + s22 = s22 + a(m-1,l)*b(l,j+1) + s23 = s23 + a(m-1,l)*b(l,j+2) + s24 = s24 + a(m-1,l)*b(l,j+3) + + s31 = s31 + a(m,l)*b(l,j) + s32 = s32 + a(m,l)*b(l,j+1) + s33 = s33 + a(m,l)*b(l,j+2) + s34 = s34 + a(m,l)*b(l,j+3) + enddo + c(m-2,j) = s11 + c(m-2,j+1) = s12 + c(m-2,j+2) = s13 + c(m-2,j+3) = s14 + + c(m-1,j) = s21 + c(m-1,j+1) = s22 + c(m-1,j+2) = s23 + c(m-1,j+3) = s24 + + c(m,j) = s31 + c(m,j+1) = s32 + c(m,j+2) = s33 + c(m,j+3) = s34 + enddo +* mresid is 3, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n) + enddo + c(m-2,n) = s11 + c(m-1,n) = s21 + c(m,n) = s31 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-1) + s12 = s12 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-1) + s22 = s22 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-1) + s32 = s32 + a(m,l)*b(l,n) + enddo + c(m-2,n-1) = s11 + c(m-2,n) = s12 + c(m-1,n-1) = s21 + c(m-1,n) = s22 + c(m,n-1) = s31 + c(m,n) = s32 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-2) + s12 = s12 + a(m-2,l)*b(l,n-1) + s13 = s13 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-2) + s22 = s22 + a(m-1,l)*b(l,n-1) + s23 = s23 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-2) + s32 = s32 + a(m,l)*b(l,n-1) + s33 = s33 + a(m,l)*b(l,n) + enddo + c(m-2,n-2) = s11 + c(m-2,n-1) = s12 + c(m-2,n) = s13 + c(m-1,n-2) = s21 + c(m-1,n-1) = s22 + c(m-1,n) = s23 + c(m,n-2) = s31 + c(m,n-1) = s32 + c(m,n) = s33 + return + endif + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxm44_2(a, m, b, k, c, n) + real a(m,2), b(2,n), c(m,n) + + nresid = iand(n,3) + n1 = n - nresid + 1 + + do j=1,n-nresid,4 + do i=1,m + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + c(i,j+1) = a(i,1)*b(1,j+1) + $ + a(i,2)*b(2,j+1) + c(i,j+2) = a(i,1)*b(1,j+2) + $ + a(i,2)*b(2,j+2) + c(i,j+3) = a(i,1)*b(1,j+3) + $ + a(i,2)*b(2,j+3) + enddo + enddo + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + do i=1,m + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + elseif (nresid .eq. 2) then + do i=1,m + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + else + do i=1,m + c(i,n-2) = a(i,1)*b(1,n-2) + $ + a(i,2)*b(2,n-2) + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxm_test_all(nid,ivb) +c +c Collect matrix-matrix product statistics +c + external mxms,mxmur2,mxmur3,mxmd,mxmfb,mxmf3,mxmu4,mxmn2 + external mxmk2,mxmtr,mxmrg,madd,mxm,mxm44 +c + parameter (nn=24) + parameter (nt=10) + character*5 c(3,nt) + real s(nn,2,nt,3) + real a(nn,2,nt,3) + + call nekgsync + + do k=1,3 ! 3 tests: N^2 x N, NxN, NxN^2 + call mxmtest(s(1,1, 1,k),nn,c(k, 1),mxm44 ,'mxm44',k,ivb) + call mxmtest(s(1,1, 2,k),nn,c(k, 2),mxms ,' std ',k,ivb) + call mxmtest(s(1,1, 3,k),nn,c(k, 3),mxmur2,'mxmu2',k,ivb) + call mxmtest(s(1,1, 4,k),nn,c(k, 4),mxmur3,'mxmu3',k,ivb) + call mxmtest(s(1,1, 5,k),nn,c(k, 5),mxmd ,'mxmd ',k,ivb) + call mxmtest(s(1,1, 6,k),nn,c(k, 6),mxmfb ,'mxmfb',k,ivb) + call mxmtest(s(1,1, 7,k),nn,c(k, 7),mxmu4 ,'mxmu4',k,ivb) + call mxmtest(s(1,1, 8,k),nn,c(k, 8),mxmf3 ,'mxmf3',k,ivb) + if (k.eq.2) ! Add works only for NxN case + $ call mxmtest(s(1,1, 9,k),nn,c(k, 9),madd ,'madd ',k,ivb) + call mxmtest(s(1,1,10,k),nn,c(k,10),mxm ,'mxm ',k,ivb) + enddo + + call nekgsync + if (nid.eq.0) call mxm_analyze(s,a,nn,c,nt,ivb) + call nekgsync + + return + end +c----------------------------------------------------------------------- + subroutine initab(a,b,n) + real a(1),b(1) + do i=1,n-1 + x = i + k = mod(i,19) + 2 + l = mod(i,17) + 5 + m = mod(i,31) + 3 + a(i) = -.25*(a(i)+a(i+1)) + (x*x + k + l)/(x*x+m) + b(i) = -.25*(b(i)+b(i+1)) + (x*x + k + m)/(x*x+l) + enddo + a(n) = -.25*(a(n)+a(n)) + (x*x + k + l)/(x*x+m) + b(n) = -.25*(b(n)+b(n)) + (x*x + k + m)/(x*x+l) + return + end +c----------------------------------------------------------------------- + subroutine mxms(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO 100 I=1,N1 + C(I,J)=C(I,J)+A(I,K)*BB + 100 CONTINUE + return + end +c----------------------------------------------------------------------- + subroutine mxmu4(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + i1 = n1 - mod(n1,4) + 1 + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO I=1,N1-3,4 + C(I ,J)=C(I ,J)+A(I ,K)*BB + C(I+1,J)=C(I+1,J)+A(I+1,K)*BB + C(I+2,J)=C(I+2,J)+A(I+2,K)*BB + C(I+3,J)=C(I+3,J)+A(I+3,K)*BB + enddo + DO i=i1,N1 + C(I ,J)=C(I ,J)+A(I ,K)*BB + enddo + 100 CONTINUE + return + end +c----------------------------------------------------------------------- + subroutine madd (a,n1,b,n2,c,n3) +c + real a(n1,n2),b(n2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,j)+b(i,j) + enddo + enddo +c + return + end +c----------------------------------------------------------------------- + subroutine mxmUR2(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + if (n2.le.8) then + if (n2.eq.1) then + call mxmur2_1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxmur2_2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxmur2_3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxmur2_4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxmur2_5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxmur2_6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxmur2_7(a,n1,b,n2,c,n3) + else + call mxmur2_8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxmur2_9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxmur2_10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxmur2_11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxmur2_12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxmur2_13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxmur2_14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxmur2_15(a,n1,b,n2,c,n3) + else + call mxmur2_16(a,n1,b,n2,c,n3) + endif + else + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO 100 I=1,N1 + C(I,J)=C(I,J)+A(I,K)*BB + 100 CONTINUE + endif + return + end +c + subroutine mxmur2_1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end + subroutine mxmur2_2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end + subroutine mxmur2_3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end + subroutine mxmur2_4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end + subroutine mxmur2_5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end + subroutine mxmur2_6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end + subroutine mxmur2_7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end + subroutine mxmur2_8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end + subroutine mxmur2_9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end + subroutine mxmur2_10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end + subroutine mxmur2_11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end + subroutine mxmur2_12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end + subroutine mxmur2_13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end + subroutine mxmur2_14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end + subroutine mxmur2_15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end + subroutine mxmur2_16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmUR3(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + if (n3.le.8) then + if (n3.eq.1) then + call mxmur3_1(a,n1,b,n2,c,n3) + elseif (n3.eq.2) then + call mxmur3_2(a,n1,b,n2,c,n3) + elseif (n3.eq.3) then + call mxmur3_3(a,n1,b,n2,c,n3) + elseif (n3.eq.4) then + call mxmur3_4(a,n1,b,n2,c,n3) + elseif (n3.eq.5) then + call mxmur3_5(a,n1,b,n2,c,n3) + elseif (n3.eq.6) then + call mxmur3_6(a,n1,b,n2,c,n3) + elseif (n3.eq.7) then + call mxmur3_7(a,n1,b,n2,c,n3) + else + call mxmur3_8(a,n1,b,n2,c,n3) + endif + elseif (n3.le.16) then + if (n3.eq.9) then + call mxmur3_9(a,n1,b,n2,c,n3) + elseif (n3.eq.10) then + call mxmur3_10(a,n1,b,n2,c,n3) + elseif (n3.eq.11) then + call mxmur3_11(a,n1,b,n2,c,n3) + elseif (n3.eq.12) then + call mxmur3_12(a,n1,b,n2,c,n3) + elseif (n3.eq.13) then + call mxmur3_13(a,n1,b,n2,c,n3) + elseif (n3.eq.14) then + call mxmur3_14(a,n1,b,n2,c,n3) + elseif (n3.eq.15) then + call mxmur3_15(a,n1,b,n2,c,n3) + else + call mxmur3_16(a,n1,b,n2,c,n3) + endif + else + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO 100 I=1,N1 + C(I,J)=C(I,J)+A(I,K)*BB + 100 CONTINUE + endif + return + end +c + subroutine mxmur3_16(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,16),c(n1,16) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + tmp14 = b(k,14) + tmp15 = b(k,15) + tmp16 = b(k,16) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + c(i,14) = c(i,14) + a(i,k) * tmp14 + c(i,15) = c(i,15) + a(i,k) * tmp15 + c(i,16) = c(i,16) + a(i,k) * tmp16 + enddo +c + enddo +c + return + end + subroutine mxmur3_15(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,15),c(n1,15) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + tmp14 = b(k,14) + tmp15 = b(k,15) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + c(i,14) = c(i,14) + a(i,k) * tmp14 + c(i,15) = c(i,15) + a(i,k) * tmp15 + enddo +c + enddo +c + return + end + subroutine mxmur3_14(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,14),c(n1,14) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + tmp14 = b(k,14) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + c(i,14) = c(i,14) + a(i,k) * tmp14 + enddo +c + enddo +c + return + end + subroutine mxmur3_13(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,13),c(n1,13) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + enddo +c + enddo +c + return + end + subroutine mxmur3_12(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,12),c(n1,12) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + enddo +c + enddo +c + return + end + subroutine mxmur3_11(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,11),c(n1,11) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + enddo +c + enddo +c + return + end + subroutine mxmur3_10(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,10),c(n1,10) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + enddo +c + enddo +c + return + end + subroutine mxmur3_9(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,9),c(n1,9) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + enddo +c + enddo +c + return + end + subroutine mxmur3_8(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,8),c(n1,8) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + enddo +c + enddo +c + return + end + subroutine mxmur3_7(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,7),c(n1,7) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + enddo +c + enddo +c + return + end + subroutine mxmur3_6(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,6),c(n1,6) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + enddo +c + enddo +c + return + end + subroutine mxmur3_5(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,5),c(n1,5) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + enddo +c + enddo +c + return + end + subroutine mxmur3_4(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,4),c(n1,4) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + enddo +c + enddo +c + return + end + subroutine mxmur3_3(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,3),c(n1,3) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + enddo +c + enddo +c + return + end + subroutine mxmur3_2(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,2),c(n1,2) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + enddo +c + enddo +c + return + end + subroutine mxmur3_1(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,1),c(n1,1) +c + do k=1,n2 + tmp1 = b(k, 1) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + enddo + enddo +c + return + end +C---------------------------------------------------------------------- + subroutine mxmd(a,n1,b,n2,c,n3) +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) + REAL ONE,ZERO,EPS +C +C +C + one=1.0 + zero=0.0 + call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + return + end +c----------------------------------------------------------------------- + subroutine mxmfb(a,n1,b,n2,c,n3) +C----------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C---------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + integer wdsize + save wdsize + data wdsize/0/ +c +c First call: determine word size for dgemm/sgemm discrimination, below. +c + if (wdsize.eq.0) then + one = 1.0 + eps = 1.e-12 + wdsize = 8 + if (one+eps.eq.1.0) wdsize = 4 + endif +c + if (n2.le.8) then + if (n2.eq.1) then + call mxmfb_1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxmfb_2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxmfb_3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxmfb_4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxmfb_5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxmfb_6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxmfb_7(a,n1,b,n2,c,n3) + else + call mxmfb_8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxmfb_9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxmfb_10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxmfb_11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxmfb_12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxmfb_13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxmfb_14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxmfb_15(a,n1,b,n2,c,n3) + else + call mxmfb_16(a,n1,b,n2,c,n3) + endif + elseif (n2.le.24) then + if (n2.eq.17) then + call mxmfb_17(a,n1,b,n2,c,n3) + elseif (n2.eq.18) then + call mxmfb_18(a,n1,b,n2,c,n3) + elseif (n2.eq.19) then + call mxmfb_19(a,n1,b,n2,c,n3) + elseif (n2.eq.20) then + call mxmfb_20(a,n1,b,n2,c,n3) + elseif (n2.eq.21) then + call mxmfb_21(a,n1,b,n2,c,n3) + elseif (n2.eq.22) then + call mxmfb_22(a,n1,b,n2,c,n3) + elseif (n2.eq.23) then + call mxmfb_23(a,n1,b,n2,c,n3) + elseif (n2.eq.24) then + call mxmfb_24(a,n1,b,n2,c,n3) + endif + else +c + one=1.0 + zero=0.0 + if (wdsize.eq.4) then + call sgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + else + call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + endif + + endif + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_17(a,n1,b,n2,c,n3) +c + real a(n1,17),b(17,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_18(a,n1,b,n2,c,n3) +c + real a(n1,18),b(18,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_19(a,n1,b,n2,c,n3) +c + real a(n1,19),b(19,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_20(a,n1,b,n2,c,n3) +c + real a(n1,20),b(20,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_21(a,n1,b,n2,c,n3) +c + real a(n1,21),b(21,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_22(a,n1,b,n2,c,n3) +c + real a(n1,22),b(22,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_23(a,n1,b,n2,c,n3) +c + real a(n1,23),b(23,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_24(a,n1,b,n2,c,n3) +c + real a(n1,24),b(24,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + $ + a(i,24)*b(24,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3(a,n1,b,n2,c,n3) +C----------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C---------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + integer wdsize + save wdsize + data wdsize/0/ +c +c First call: determine word size for dgemm/sgemm discrimination, below. +c + if (wdsize.eq.0) then + one = 1.0 + eps = 1.e-12 + wdsize = 8 + if (one+eps.eq.1.0) wdsize = 4 + endif +c + if (n2.le.8) then + if (n2.eq.1) then + call mxmf3_1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxmf3_2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxmf3_3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxmf3_4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxmf3_5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxmf3_6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxmf3_7(a,n1,b,n2,c,n3) + else + call mxmf3_8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxmf3_9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxmf3_10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxmf3_11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxmf3_12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxmf3_13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxmf3_14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxmf3_15(a,n1,b,n2,c,n3) + else + call mxmf3_16(a,n1,b,n2,c,n3) + endif + elseif (n2.le.24) then + if (n2.eq.17) then + call mxmf3_17(a,n1,b,n2,c,n3) + elseif (n2.eq.18) then + call mxmf3_18(a,n1,b,n2,c,n3) + elseif (n2.eq.19) then + call mxmf3_19(a,n1,b,n2,c,n3) + elseif (n2.eq.20) then + call mxmf3_20(a,n1,b,n2,c,n3) + elseif (n2.eq.21) then + call mxmf3_21(a,n1,b,n2,c,n3) + elseif (n2.eq.22) then + call mxmf3_22(a,n1,b,n2,c,n3) + elseif (n2.eq.23) then + call mxmf3_23(a,n1,b,n2,c,n3) + elseif (n2.eq.24) then + call mxmf3_24(a,n1,b,n2,c,n3) + endif + else +c + one=1.0 + zero=0.0 + if (wdsize.eq.4) then + call sgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + else + call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + endif +c +c N0=N1*N3 +c DO 10 I=1,N0 +c C(I,1)=0. +c 10 CONTINUE +c DO 100 J=1,N3 +c DO 100 K=1,N2 +c BB=B(K,J) +c DO 100 I=1,N1 +c C(I,J)=C(I,J)+A(I,K)*BB +c 100 CONTINUE + + endif + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_17(a,n1,b,n2,c,n3) +c + real a(n1,17),b(17,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_18(a,n1,b,n2,c,n3) +c + real a(n1,18),b(18,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_19(a,n1,b,n2,c,n3) +c + real a(n1,19),b(19,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_20(a,n1,b,n2,c,n3) +c + real a(n1,20),b(20,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_21(a,n1,b,n2,c,n3) +c + real a(n1,21),b(21,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_22(a,n1,b,n2,c,n3) +c + real a(n1,22),b(22,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_23(a,n1,b,n2,c,n3) +c + real a(n1,23),b(23,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_24(a,n1,b,n2,c,n3) +c + real a(n1,24),b(24,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + $ + a(i,24)*b(24,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxm44(a,n1,b,n2,c,n3) +C----------------------------------------------------------------------- +C +C NOTE -- this code has been set up with the "mxmf3" routine +c referenced in memtime.f. On most machines, the f2 +c and f3 versions give the same performance (f2 is the +c nekton standard). On the t3e, f3 is noticeably faster. +c pff 10/5/98 +C +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C---------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +c + if (n2.eq.1) then + call mxm44_2_t(a,n1,b,2,c,n3) + elseif (n2.eq.2) then + call mxm44_2_t(a,n1,b,n2,c,n3) + else + call mxm44_0_t(a,n1,b,n2,c,n3) + endif +c + return + end +c +c----------------------------------------------------------------------- + subroutine mxm44_0_t(a, m, b, k, c, n) +* subroutine matmul44(m, n, k, a, lda, b, ldb, c, ldc) +* real*8 a(lda,k), b(ldb,n), c(ldc,n) + real a(m,k), b(k,n), c(m,n) + real s11, s12, s13, s14, s21, s22, s23, s24 + real s31, s32, s33, s34, s41, s42, s43, s44 +c +c matrix multiply with a 4x4 pencil +c + + mresid = iand(m,3) + nresid = iand(n,3) + m1 = m - mresid + 1 + n1 = n - nresid + 1 + + do i=1,m-mresid,4 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + s44 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + s14 = s14 + a(i,l)*b(l,j+3) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + s24 = s24 + a(i+1,l)*b(l,j+3) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + s34 = s34 + a(i+2,l)*b(l,j+3) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + s44 = s44 + a(i+3,l)*b(l,j+3) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + c(i,j+2) = s13 + c(i,j+3) = s14 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + + c(i+1,j+3) = s24 + c(i+2,j+3) = s34 + c(i+3,j+3) = s44 + enddo +* Residual when n is not multiple of 4 + if (nresid .ne. 0) then + if (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,n) + s21 = s21 + a(i+1,l)*b(l,n) + s31 = s31 + a(i+2,l)*b(l,n) + s41 = s41 + a(i+3,l)*b(l,n) + enddo + c(i,n) = s11 + c(i+1,n) = s21 + c(i+2,n) = s31 + c(i+3,n) = s41 + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + enddo + c(i,j) = s11 + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + c(i,j+1) = s12 + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + c(i,j+2) = s13 + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + endif + endif + enddo + +* Residual when m is not multiple of 4 + if (mresid .eq. 0) then + return + elseif (mresid .eq. 1) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,j) + s12 = s12 + a(m,l)*b(l,j+1) + s13 = s13 + a(m,l)*b(l,j+2) + s14 = s14 + a(m,l)*b(l,j+3) + enddo + c(m,j) = s11 + c(m,j+1) = s12 + c(m,j+2) = s13 + c(m,j+3) = s14 + enddo +* mresid is 1, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n) + enddo + c(m,n) = s11 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s12 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-1) + s12 = s12 + a(m,l)*b(l,n) + enddo + c(m,n-1) = s11 + c(m,n) = s12 + return + else + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-2) + s12 = s12 + a(m,l)*b(l,n-1) + s13 = s13 + a(m,l)*b(l,n) + enddo + c(m,n-2) = s11 + c(m,n-1) = s12 + c(m,n) = s13 + return + endif + elseif (mresid .eq. 2) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + s21 = 0.0d0 + s22 = 0.0d0 + s23 = 0.0d0 + s24 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,j) + s12 = s12 + a(m-1,l)*b(l,j+1) + s13 = s13 + a(m-1,l)*b(l,j+2) + s14 = s14 + a(m-1,l)*b(l,j+3) + + s21 = s21 + a(m,l)*b(l,j) + s22 = s22 + a(m,l)*b(l,j+1) + s23 = s23 + a(m,l)*b(l,j+2) + s24 = s24 + a(m,l)*b(l,j+3) + enddo + c(m-1,j) = s11 + c(m-1,j+1) = s12 + c(m-1,j+2) = s13 + c(m-1,j+3) = s14 + c(m,j) = s21 + c(m,j+1) = s22 + c(m,j+2) = s23 + c(m,j+3) = s24 + enddo +* mresid is 2, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n) + enddo + c(m-1,n) = s11 + c(m,n) = s21 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-1) + s12 = s12 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-1) + s22 = s22 + a(m,l)*b(l,n) + enddo + c(m-1,n-1) = s11 + c(m-1,n) = s12 + c(m,n-1) = s21 + c(m,n) = s22 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-2) + s12 = s12 + a(m-1,l)*b(l,n-1) + s13 = s13 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-2) + s22 = s22 + a(m,l)*b(l,n-1) + s23 = s23 + a(m,l)*b(l,n) + enddo + c(m-1,n-2) = s11 + c(m-1,n-1) = s12 + c(m-1,n) = s13 + c(m,n-2) = s21 + c(m,n-1) = s22 + c(m,n) = s23 + return + endif + else +* mresid is 3 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + + do l=1,k + s11 = s11 + a(m-2,l)*b(l,j) + s12 = s12 + a(m-2,l)*b(l,j+1) + s13 = s13 + a(m-2,l)*b(l,j+2) + s14 = s14 + a(m-2,l)*b(l,j+3) + + s21 = s21 + a(m-1,l)*b(l,j) + s22 = s22 + a(m-1,l)*b(l,j+1) + s23 = s23 + a(m-1,l)*b(l,j+2) + s24 = s24 + a(m-1,l)*b(l,j+3) + + s31 = s31 + a(m,l)*b(l,j) + s32 = s32 + a(m,l)*b(l,j+1) + s33 = s33 + a(m,l)*b(l,j+2) + s34 = s34 + a(m,l)*b(l,j+3) + enddo + c(m-2,j) = s11 + c(m-2,j+1) = s12 + c(m-2,j+2) = s13 + c(m-2,j+3) = s14 + + c(m-1,j) = s21 + c(m-1,j+1) = s22 + c(m-1,j+2) = s23 + c(m-1,j+3) = s24 + + c(m,j) = s31 + c(m,j+1) = s32 + c(m,j+2) = s33 + c(m,j+3) = s34 + enddo +* mresid is 3, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n) + enddo + c(m-2,n) = s11 + c(m-1,n) = s21 + c(m,n) = s31 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-1) + s12 = s12 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-1) + s22 = s22 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-1) + s32 = s32 + a(m,l)*b(l,n) + enddo + c(m-2,n-1) = s11 + c(m-2,n) = s12 + c(m-1,n-1) = s21 + c(m-1,n) = s22 + c(m,n-1) = s31 + c(m,n) = s32 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-2) + s12 = s12 + a(m-2,l)*b(l,n-1) + s13 = s13 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-2) + s22 = s22 + a(m-1,l)*b(l,n-1) + s23 = s23 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-2) + s32 = s32 + a(m,l)*b(l,n-1) + s33 = s33 + a(m,l)*b(l,n) + enddo + c(m-2,n-2) = s11 + c(m-2,n-1) = s12 + c(m-2,n) = s13 + c(m-1,n-2) = s21 + c(m-1,n-1) = s22 + c(m-1,n) = s23 + c(m,n-2) = s31 + c(m,n-1) = s32 + c(m,n) = s33 + return + endif + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxm44_2_t(a, m, b, k, c, n) + real a(m,2), b(2,n), c(m,n) + + nresid = iand(n,3) + n1 = n - nresid + 1 + + do j=1,n-nresid,4 + do i=1,m + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + c(i,j+1) = a(i,1)*b(1,j+1) + $ + a(i,2)*b(2,j+1) + c(i,j+2) = a(i,1)*b(1,j+2) + $ + a(i,2)*b(2,j+2) + c(i,j+3) = a(i,1)*b(1,j+3) + $ + a(i,2)*b(2,j+3) + enddo + enddo + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + do i=1,m + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + elseif (nresid .eq. 2) then + do i=1,m + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + else + do i=1,m + c(i,n-2) = a(i,1)*b(1,n-2) + $ + a(i,2)*b(2,n-2) + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxmtest(s,nn,cn,mxmt,name,k,ivb) + + real s(nn,2) ! MFLOPS + character*5 cn ! name + character*5 name + external mxmt + + include 'SIZE' + parameter (lt=4*lx1*ly1*lz1*lelt) + common /scrns/ a(lt) + common /scruz/ b(lt) + common /scrmg/ c(lt) + + integer ll,icalld + save ll,icalld + data ll,icalld /1,0/ + + if (icalld.eq.0) then ! Initialize matrices: + icalld = icalld + 1 + time1 = dnekclock() + call initab(a,b,lt) + time2 = dnekclock()-time1 + if (nid.eq.0) write(6,*) 'mxm test init:',lt,time2,name + endif + + + cn = name + +c Rectangular matrix tests + + nn0 = 1 + nn1 = nn + if (ivb.eq.0) then + nn0 = lx1 + nn1 = lx1 + endif + + m = k + do n=nn0,nn1 + n1 = n + n2 = n + n3 = n + if (m.eq.1) n1 = n*n + if (m.eq.3) n3 = n*n + if (lt.gt.n1*n3) then + n13 = max(n1,n3) + loop = 250000/(n1*n2*n3) + 500 + if (name.eq.'madd ') loop = 200000/(n1*n3) + 5000 + +c------------------------------------------------------- +c mem test +c------------------------------------------------------- + + t0 = dnekclock() + overh = dnekclock()-t0 + time1 = dnekclock() + do l=1,loop + if (ll.ge.lt-n1*n3) ll = 1 + call mxmt(a(ll),n1,b(ll),n2,c(ll),n3) + ll = ll+n1*n3 + enddo + time2 = dnekclock() + time = time2-time1 - overh + iops=loop*n1*n3*(2*n2-1) + if (name.eq.'madd ') iops = loop*n1*n3 +c write(6,*) loop,time,time2,time1,overh + flops=iops/(1.0e6*time) + s(n,1) = flops +c + timel = time/loop + if (nid.eq.0) write(6,199) n,n1,n2,n3,flops,timel,name + 199 format(i3,'m',1x,3i6,f10.4,e16.5,3x,a5,' mem') +c +c------------------------------------------------------- +c fast test +c------------------------------------------------------- +c + call mxmt(a,n1,b,n2,c,n3) + t0 = dnekclock() + overh = dnekclock()-t0 + time1 = dnekclock() + do l=1,loop + call mxmt(a,n1,b,n2,c,n3) + enddo + time2 = dnekclock() + time = time2-time1 - overh + iops=loop*n1*n3*(2*n2-1) + if (name.eq.'madd ') iops = loop*n1*n3 + flops=iops/(1.0e6*time) + s(n,2) = flops + timel = time/loop +c + if (nid.eq.0) write(6,198) n,n1,n2,n3,flops,timel,name + 198 format(i3,'f',1x,3i6,f10.4,e16.5,3x,a5,' fast') +c + endif + enddo +c + return + end +c----------------------------------------------------------------------- + subroutine mxm_analyze(s,a,nn,c,nt,ivb) + include 'SIZE' + + character*5 c(3,nt) + real s(nn,2,nt,3) ! Measured Mflops, 3 cases + real a(nn,2,nt,3) +c ^ ^ ^ |__ N^2xN, NxN, NxN^2 +c matrix order N __| | |__________which mxm +c | +c |__cached vs. noncached data + + + integer itmax(200) + + nn0 = 1 + nn1 = nn + if (ivb.eq.0) then + nn0 = lx1 + nn1 = lx1 + endif + + do n = nn0,nn1 + fmax = 0. ! Peak mflops + do it=1,nt + ai = 0. + di = 0. + do k=1,3 + if (s(n,1,it,k).gt.0) then ! Take harmonic means of + ai = ai + 1./s(n,1,it,k) ! case I II and III for + di = di + 1. ! mem test, s(n,1...). + endif + enddo + if (ai.gt.0) ai = di/ai + a(n,1,it,1) = di/ai + if (ai.gt.fmax.and.c(2,it).ne.'madd ') then + fmax = ai + itmax(n) = it + endif + enddo + it = itmax(n) + if (nid.eq.0) write(6,3) n,it,c(2,it),(s(n,1,it,k),k=1,3),fmax + 3 format(i3,i2,1x,a5,4f12.0,' Peak harmonic') + enddo + call out_anal(s,a,nn,c,nt,itmax,'Harmonic',1,ivb) +c +c Case by case +c + do k=1,3 + do n = nn0,nn1 + fmax = 0. ! Peak mflops + do it=1,nt + ai = s(n,1,it,k) + if (ai.gt.fmax.and.c(2,it).ne.'madd ') then + fmax = ai + itmax(n) = it + endif + enddo + enddo + if (k.eq.1) call out_anal(s,a,nn,c,nt,itmax,'Case N2N',k,ivb) + if (k.eq.2) call out_anal(s,a,nn,c,nt,itmax,'Case NxN',k,ivb) + if (k.eq.3) call out_anal(s,a,nn,c,nt,itmax,'Case NN2',k,ivb) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine out_anal(s,a,nn,c,nt,itmax,name8,k,ivb) + include 'SIZE' + + character*5 c(3,nt) + real s(nn,2,nt,3) + real a(nn,2,nt,3) + integer itmax(200) + character*8 name8 + + if (nid.ne.0) return + + nn0 = 1 + nn1 = nn + if (ivb.eq.0) then + nn0 = lx1 + nn1 = lx1 + endif + + + do n=nn0,nn1 + it = itmax(n) + write(6,1) n,s(n,1,it,k),c(2,it),name8 + 1 format(i4,f14.0,4x,a5,4x,a8,' MxM MFLOPS') + enddo + + return + end +c----------------------------------------------------------------------- diff --git a/src/mxm_wrapper.f b/src/mxm_wrapper.f new file mode 100644 index 0000000..7c3d111 --- /dev/null +++ b/src/mxm_wrapper.f @@ -0,0 +1,165 @@ + subroutine mxm(a,n1,b,n2,c,n3) + +#if defined(XSMM_DISPATCH) + USE :: LIBXSMM +#endif + +#define LIBXSMM_DMM1(N, a, b, c) LIBXSMM_DMM1_str(N, a, b, c) +#define LIBXSMM_DMM1_str(N, a, b, c) libxsmm_dmm_##N##x##N##_##N##_##N(a, b, c) +#define LIBXSMM_DMM2(N, a, b, c) LIBXSMM_DMM2_str(N, a, b, c) +#define LIBXSMM_DMM2_str(N, a, b, c) libxsmm_dmm_##N##_##N##x##N##_##N(a, b, c) +#define LIBXSMM_DMM3(N, a, b, c) LIBXSMM_DMM3_str(N, a, b, c) +#define LIBXSMM_DMM3_str(N, a, b, c) libxsmm_dmm_##N##_##N##_##N(a, b, c) + +c +c Compute matrix-matrix product C = A*B +c for contiguously packed matrices A,B, and C. +c +#if defined (MKL) +# include "mkl_direct_call.fi" +#endif + real a(n1,n2),b(n2,n3),c(n1,n3) + real alpha, beta +c + include 'SIZE' + include 'TOTAL' +c + integer aligned + integer K10_mxm + integer init, prevn2 + +#if defined(XSMM_DISPATCH) + TYPE(LIBXSMM_DMMFUNCTION) :: xmm1,xmm2,xmm3 +#endif + + data init /0/, prevn2 /0/ + save init, prevn2 +#if defined(XSMM_DISPATCH) + save xsmm1, xsmm2, xsmm3 +#endif + +c write(*,*) "in", init, prevn2, LOC(xsmm1), LOC(xsmm2), LOC(xsmm3) + +#if defined (MKL) + alpha = 1.0 + beta = 0.0 + call dgemm('N','N',n1,n3,n2,alpha,A,n1,B,n2,beta,C,n1) +#elif defined (BLAS_MXM) + alpha = 1.0 + beta = 0.0 + call dgemm('N','N',n1,n3,n2,alpha,a,n1,b,n2,beta,c,n1) +#elif defined (BG) + call bg_aligned3(a,b,c,aligned) + if (n2.eq.2) then + call mxm44_2(a,n1,b,n2,c,n3) + else if ((aligned.eq.1) .and. + $ (n1.ge.8) .and. (n2.ge.8) .and. (n3.ge.8) .and. + $ (modulo(n1,2).eq.0) .and. (modulo(n2,2).eq.0) ) then + if (modulo(n3,4).eq.0) then + call bg_mxm44(a,n1,b,n2,c,n3) + else + call bg_mxm44_uneven(a,n1,b,n2,c,n3) + endif + else if((aligned.eq.1) .and. + $ (modulo(n1,6).eq.0) .and. (modulo(n3,6).eq.0) .and. + $ (n2.ge.4) .and. (modulo(n2,2).eq.0) ) then + call bg_mxm3(a,n1,b,n2,c,n3) + else + call mxm44_0(a,n1,b,n2,c,n3) + endif +#elif defined (K10_MXM) + ! fow now only supported for lx1=8 + ! tuned for AMD K10 + ierr = K10_mxm(a,n1,b,n2,c,n3) + if (ierr.gt.0) call mxmf2(a,n1,b,n2,c,n3) +#elif defined (XSMM_DISPATCH) + if (init == 0) then + CALL libxsmm_init() + init = 1 + write(*,*) "initializing libxsmm" + end if + + if (prevn2 /= n2) then + prevn2 = n2 + + CALL libxsmm_dispatch(xmm1, n2, n2, n2*n2, alpha=1D0, beta=0D0) +c write(*,*) "initialized xmm1" + IF (.NOT. libxsmm_available(xmm1)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + + CALL libxsmm_dispatch(xmm2, n2, n2, n2, alpha=1D0, beta=0D0) +c write(*,*) "initialized xmm2" + IF (.NOT. libxsmm_available(xmm2)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + + CALL libxsmm_dispatch(xmm3, n2*n2, n2, n2, alpha=1D0, beta=0D0) +c write(*,*) "initialized xmm3" + IF (.NOT. libxsmm_available(xmm3)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + end if + + if (n1 .eq. n2*n2) then +c write(*,*) "call to xmm3", n1, n2, n3 + IF (.NOT. libxsmm_available(xmm3)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + call libxsmm_call(xmm3, C_LOC(a), C_LOC(b), C_LOC(c)) +c call libxsmm_dmm_256_16_16(a, b, c) + else if (n3 .eq. n2*n2) then +c write(*,*) "call to xmm1", n1, n2, n3 + IF (.NOT. libxsmm_available(xmm1)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + call libxsmm_call(xmm1, C_LOC(a), C_LOC(b), C_LOC(c)) +c call libxsmm_dmm_16_256_16(a, b, c) + else +c write(*,*) "call to xmm2", n1, n2, n3 + IF (.NOT. libxsmm_available(xmm2)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + call libxsmm_call(xmm2, C_LOC(a), C_LOC(b), C_LOC(c)) +c call libxsmm_dmm_16_16_16(a, b, c) + end if +#elif defined (XSMM_FIXED) + if (n2 .eq. NPOLY) then + if (n1 .eq. n2*n2) then + call LIBXSMM_DMM1(NPOLY, a, b, c) + else if (n3 .eq. n2*n2) then + call LIBXSMM_DMM2(NPOLY, a, b, c) + else + call LIBXSMM_DMM3(NPOLY, a, b, c) + end if + else + write(*,*) "Invalid matrix size" + stop + end if +#elif defined (XSMM) + alpha = 1.0 + beta = 0.0 + CALL libxsmm_dgemm('N','N',n1,n3,n2,alpha,A,n1,B,n2,beta,C,n1) +#elif defined (MXMBASIC) + do j=1,n3 + do i=1,n1 + c(i,j) = 0.0 + do k=1,n2 + c(i,j) = c(i,j) + a(i,k)*b(k,j) + enddo + enddo + enddo +#else + call mxmf2(a,n1,b,n2,c,n3) +#endif + +c write(*,*) "out", init, prevn2, xsmm1, xsmm2, xsmm3 + + return + end diff --git a/src/omp.f b/src/omp.f new file mode 100644 index 0000000..1591214 --- /dev/null +++ b/src/omp.f @@ -0,0 +1,128 @@ +#ifdef TIMERS +#define NBTIMER(a) a = dnekclock() +#define STIMER(a) a = dnekclock_sync() +#define ACCUMTIMER(b,a) b = b + (dnekclock()- a) +#else +#define NBTIMER(a) +#define STIMER(a) +#define ACCUMTIMER(a,b) +#endif + + + subroutine rzeroi(a,n,start,fin) + implicit none + + real a(n) + integer n, i, start, fin + + do i = start, fin + a(i) = 0.0 + end do + + return + end subroutine + +c---------------------------------------------------------- + + subroutine copyi(a,b,n, start, fin) + implicit none + + real a(n),b(n) + integer n, i, start, fin + + do i=start,fin + a(i)=b(i) + enddo + + return + end subroutine + +c---------------------------------------------------------- + + subroutine glsc3i(val,a,b,mult,n,find,lind) + implicit none + + include 'TIMER' + + real val,a(n),b(n),mult(n) + real tsum,psum,work(1) + integer n,find,lind + integer i, tmt, thread + integer omp_get_thread_num + + save psum + data psum /0.0/ + + thread = 0 +#ifdef _OPENMP + thread = omp_get_thread_num() +#endif + tmt = thread + 1 + + tsum = 0.0 + do i=find, lind + tsum = tsum + a(i)*b(i)*mult(i) + end do + +c$OMP ATOMIC update + psum = psum + tsum +c$OMP END ATOMIC + +c$OMP BARRIER + NBTIMER(ttemp4) +c$OMP MASTER + call gop(psum,work,'+ ',1) + val = psum + psum = 0.0 +c$OMP END MASTER +c$OMP BARRIER + ACCUMTIMER(tgop(gopi(tmt),tmt), ttemp4) + + + return + end subroutine + +c---------------------------------------------------------- + + subroutine solveMi(z,r,n,start,fin) + implicit none + + real z(n),r(n) + integer n,start,fin + + call copyi(z,r,n,start,fin) + + return + end + +c---------------------------------------------------------- + + subroutine add2s1i(a,b,c1,n,start,fin) + implicit none + + real a(n),b(n),c1 + integer n,start,fin + integer i + + do i= start, fin + a(i)=c1*a(i)+b(i) + end do + + return + end subroutine + +c---------------------------------------------------------- + + subroutine add2s2i(a,b,c1,n,start,fin) + implicit none + + real a(n),b(n),c1 + integer n,start,fin + integer i + + do i= start,fin + a(i)=a(i)+c1*b(i) + end do + + return + end subroutine diff --git a/src/prox_dssum.f b/src/prox_dssum.f new file mode 100644 index 0000000..c3c0402 --- /dev/null +++ b/src/prox_dssum.f @@ -0,0 +1,174 @@ +c----------------------------------------------------------------------- + subroutine dssum(f) + include 'SIZE' + include 'TOTAL' + real f(1) + +c call nekgsync() + call gs_op(gsh,f,1,1,0) ! Gather-scatter operation ! w = QQ w + + return + end +c----------------------------------------------------------------------- + subroutine proxy_setupds(gs_handle) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + + integer gs_handle,dof + integer*8 glo_num(lx1*ly1*lz1*lelt) + + common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal + + t0 = dnekclock() + + call set_vert_box(glo_num) ! Set global-to-local map + + ntot = nx1*ny1*nz1*nelt + call gs_setup(gs_handle,glo_num,ntot,nekcomm,mp) ! Initialize gather-scatter + dof = ntot *mp + t1 = dnekclock() - t0 + if (nid.eq.0) then + write(6,1) t1,gs_handle,nx1,dof + 1 format(' setupds time',1pe11.4,' seconds ',2i3,i12) + endif + + return + end +c----------------------------------------------------------------------- + subroutine set_vert_box(glo_num) + +c Set up global numbering for elements in a box + + include 'SIZE' + include 'PARALLEL' + + integer*8 glo_num(1),ii,kg,jg,ig ! The latter 3 for proper promotion + + integer e,ex,ey,ez,eg + + nn = nx1-1 ! nn := polynomial order + + do e=1,nelt + eg = lglel(e) + call get_exyz(ex,ey,ez,eg,nelx,nely,nelz) + do k=0,nn + do j=0,nn + do i=0,nn + kg = nn*(ez-1) + k + jg = nn*(ey-1) + j + ig = nn*(ex-1) + i + ii = 1 + ig + jg*(nn*nelx+1) + kg*(nn*nelx+1)*(nn*nely+1) + ll = 1 + i + nx1*j + nx1*ny1*k + nx1*ny1*nz1*(e-1) + glo_num(ll) = ii + enddo + enddo + enddo + enddo + + return + end +c----------------------------------------------------------------------- + subroutine get_exyz(ex,ey,ez,eg,nelx,nely,nelz) + integer ex,ey,ez,eg + + nelxy = nelx*nely + + ez = 1 + (eg-1)/nelxy + ey = mod1 (eg,nelxy) + ey = 1 + (ey-1)/nelx + ex = mod1 (eg,nelx) + + return + end +c----------------------------------------------------------------------- + subroutine outmat_glo_num(glo_num) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + + integer*8 glo_num(lx1*ly1*lz1,lelt) + + integer e + + do e=1,nelt + call outmat_e_i8(glo_num(1,e),e) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine outmat_e_i8(gn,e) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + + integer*8 gn(lx1,ly1,lz1) + + integer e + + write(6,*) + write(6,2) e + write(6,*) + + do k0=3,1,-2 + + k1=k0+1 + do j=ny1,1,-1 + write(6,1) ((gn(i,j,k),i=1,4),k=k0,k1) + enddo + write(6,*) + + enddo + 1 format('gn: ',4i8,3x,4i8) + 2 format('gn: element: ',i4) + + return + end +c----------------------------------------------------------------------- + subroutine outmat_r(x,name5) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + character*5 name5 + + real x(lx1*ly1*lz1,lelt) + + integer e + + do e=1,nelt + call outmat_e_r(x(1,e),name5,e) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine outmat_e_r(x,name5,e) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + character*5 name5 + + real x(lx1,ly1,lz1) + + integer e + + write(6,*) + write(6,2) e,name5 + write(6,*) + + do k0=3,1,-2 + + k1=k0+1 + do j=ny1,1,-1 + write(6,1) ((x(i,j,k),i=1,4),k=k0,k1) + enddo + write(6,*) + + enddo + 1 format('mat: ',4f8.3,3x,4f8.3) + 2 format('mat: element: ',i4,2x,a5) + + return + end +c----------------------------------------------------------------------- diff --git a/src/prox_setup.f b/src/prox_setup.f new file mode 100644 index 0000000..fabe8c5 --- /dev/null +++ b/src/prox_setup.f @@ -0,0 +1,113 @@ +c----------------------------------------------------------------------- + subroutine proxy_setup(a,b,c,d,z,w,g) + + include 'SIZE' + include 'TOTAL' + + real a(lx1*lx1),b(lx1),c(lx1*lx1),d(lx1*lx1),z(lx1) + $ , w(lx1*2),g(6,lx1*ly1*lz1*lelt) + + call semhat(a,b,c,d,z,w,nx1-1) + + n = nx1*nx1 + call copy(dxm1,d,n) + call transpose(dxtm1,nx1,dxm1,nx1) + + call copy(zgm1,z,nx1) ! GLL points + call copy(wxm1,b,nx1) ! GLL weights + + call setup_g(g) + +c m = nx1*ny1*nz1*nelt +c call outmat(g,6,m,'gxyz 1',m) + + return + end +c------------------------------------------------------------------------- + subroutine setup_g(g) + + include 'SIZE' + include 'TOTAL' + real g(6,nx1,ny1,nz1,nelt) + integer e + + n = nx1*ny1*nz1*nelt + + + do e=1,nelt + do k=1,nz1 + do j=1,ny1 + do i=1,nx1 + call rzero(g(1,i,j,k,e),6) + g(1,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + g(4,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + g(6,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + g(6,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + enddo + enddo + enddo + enddo + + return + end +c------------------------------------------------------------------------- + subroutine transpose(a,lda,b,ldb) + real a(lda,1),b(ldb,1) +c + do j=1,ldb + do i=1,lda + a(i,j) = b(j,i) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine outmat(a,m,n,name6,ie) + real a(m,n) + character*6 name6 +c + n10 = min(n,10) + write(6,*) + write(6,*) ie,' matrix: ',name6,m,n + do i=1,m + write(6,6) ie,name6,(a(i,j),j=1,n10) + enddo + 6 format(i3,1x,a6,1p10e12.4) + write(6,*) + return + end +c----------------------------------------------------------------------- + subroutine outmat1(a,m,n,name6,ie) + real a(m,n) + character*6 name6 +c + n10 = min(n,10) + write(ie,*) + write(ie,*) ie,' matrix: ',name6,m,n + do i=1,m + write(ie,6) ie,name6,(a(i,j),j=1,n10) + enddo + 6 format(i3,1x,a6,1p10e12.4) + write(ie,*) + return + end +c----------------------------------------------------------------------- + function randx(seed) + +#ifdef BGQ +#define M_SIN(X) _sin((X)) +#define M_COS(X) _cos((X)) +#else +#define M_SIN(X) sin((X)) +#define M_COS(X) cos((X)) +#endif + + arg = 1.e9*seed + arg = 1.e9*M_COS(arg) + randx = M_SIN(arg) + seed = randx + seed = randx + + return + end +c----------------------------------------------------------------------- diff --git a/src/semhat.f b/src/semhat.f new file mode 100644 index 0000000..1ad8d23 --- /dev/null +++ b/src/semhat.f @@ -0,0 +1,94 @@ +c----------------------------------------------------------------------- + subroutine semhat(a,b,c,d,z,w,n) +c +c Generate matrices for single element, 1D operators: +c +c a = Laplacian +c b = diagonal mass matrix (GLL weights) +c c = convection operator b*d +c d = derivative matrix +c z = GLL points + + real a(0:n,0:n),b(0:n),c(0:n,0:n),d(0:n,0:n),z(0:n) + real w(0:2*n) + + np = n+1 + + call zwgll (z,b,np) + + do i=0,n + call fd_weights_full(z(i),z,n,1,w) + do j=0,n + d(i,j) = w(j+np) ! Derivative matrix + enddo + enddo + + call rzero(a,np*np) + do j=0,n + do i=0,n + do k=0,n + a(i,j) = a(i,j) + d(k,i)*b(k)*d(k,j) + enddo + c(i,j) = b(i)*d(i,j) + enddo + enddo + + return + end +c----------------------------------------------------------------------- + subroutine fd_weights_full(xx,x,n,m,c) +c +c This routine evaluates the derivative based on all points +c in the stencils. It is more memory efficient than "fd_weights" +c +c This set of routines comes from the appendix of +c A Practical Guide to Pseudospectral Methods, B. Fornberg +c Cambridge Univ. Press, 1996. (pff) +c +c Input parameters: +c xx -- point at wich the approximations are to be accurate +c x -- array of x-ordinates: x(0:n) +c n -- polynomial degree of interpolant (# of points := n+1) +c m -- highest order of derivative to be approxxmated at xi +c +c Output: +c c -- set of coefficients c(0:n,0:m). +c c(j,k) is to be applied at x(j) when +c the kth derivative is approxxmated by a +c stencil extending over x(0),x(1),...x(n). +c +c + real x(0:n),c(0:n,0:m) + + c1 = 1. + c4 = x(0) - xx + + do k=0,m + do j=0,n + c(j,k) = 0. + enddo + enddo + c(0,0) = 1. + + do i=1,n + mn = min(i,m) + c2 = 1. + c5 = c4 + c4 = x(i)-xx + do j=0,i-1 + c3 = x(i)-x(j) + c2 = c2*c3 + do k=mn,1,-1 + c(i,k) = c1*(k*c(i-1,k-1)-c5*c(i-1,k))/c2 + enddo + c(i,0) = -c1*c5*c(i-1,0)/c2 + do k=mn,1,-1 + c(j,k) = (c4*c(j,k)-k*c(j,k-1))/c3 + enddo + c(j,0) = c4*c(j,0)/c3 + enddo + c1 = c2 + enddo + return + end +c----------------------------------------------------------------------- diff --git a/src/speclib.f b/src/speclib.f new file mode 100644 index 0000000..6c86b64 --- /dev/null +++ b/src/speclib.f @@ -0,0 +1,1176 @@ +C============================================================================== +C +C LIBRARY ROUTINES FOR SPECTRAL METHODS +C +C March 1989 +C +C For questions, comments or suggestions, please contact: +C +C Einar Malvin Ronquist +C Room 3-243 +C Department of Mechanical Engineering +C Massachusetts Institute of Technology +C 77 Massachusetts Avenue +C Cambridge, MA 0299 +C U.S.A. +C +C------------------------------------------------------------------------------ +C +C ABBRIVIATIONS: +C +C M - Set of mesh points +C Z - Set of collocation/quadrature points +C W - Set of quadrature weights +C H - Lagrangian interpolant +C D - Derivative operator +C I - Interpolation operator +C GL - Gauss Legendre +C GLL - Gauss-Lobatto Legendre +C GJ - Gauss Jacobi +C GLJ - Gauss-Lobatto Jacobi +C +C +C MAIN ROUTINES: +C +C Points and weights: +C +C ZWGL Compute Gauss Legendre points and weights +C ZWGLL Compute Gauss-Lobatto Legendre points and weights +C ZWGJ Compute Gauss Jacobi points and weights (general) +C ZWGLJ Compute Gauss-Lobatto Jacobi points and weights (general) +C +C Lagrangian interpolants: +C +C HGL Compute Gauss Legendre Lagrangian interpolant +C HGLL Compute Gauss-Lobatto Legendre Lagrangian interpolant +C HGJ Compute Gauss Jacobi Lagrangian interpolant (general) +C HGLJ Compute Gauss-Lobatto Jacobi Lagrangian interpolant (general) +C +C Derivative operators: +C +C DGLL Compute Gauss-Lobatto Legendre derivative matrix +C DGLLGL Compute derivative matrix for a staggered mesh (GLL->GL) +C DGJ Compute Gauss Jacobi derivative matrix (general) +C DGLJ Compute Gauss-Lobatto Jacobi derivative matrix (general) +C DGLJGJ Compute derivative matrix for a staggered mesh (GLJ->GJ) (general) +C +C Interpolation operators: +C +C IGLM Compute interpolation operator GL -> M +C IGLLM Compute interpolation operator GLL -> M +C IGJM Compute interpolation operator GJ -> M (general) +C IGLJM Compute interpolation operator GLJ -> M (general) +C +C Other: +C +C PNLEG Compute Legendre polynomial of degree N +C PNDLEG Compute derivative of Legendre polynomial of degree N +C +C Comments: +C +C Note that many of the above routines exist in both single and +C double precision. If the name of the single precision routine is +C SUB, the double precision version is called SUBD. In most cases +C all the "low-level" arithmetic is done in double precision, even +C for the single precsion versions. +C +C Useful references: +C +C [1] Gabor Szego: Orthogonal Polynomials, American Mathematical Society, +C Providence, Rhode Island, 1939. +C [2] Abramowitz & Stegun: Handbook of Mathematical Functions, +C Dover, New York, 1972. +C [3] Canuto, Hussaini, Quarteroni & Zang: Spectral Methods in Fluid +C Dynamics, Springer-Verlag, 1988. +C +C +C============================================================================== +C +C-------------------------------------------------------------------- + SUBROUTINE ZWGL (Z,W,NP) +C-------------------------------------------------------------------- +C +C Generate NP Gauss Legendre points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha=0,beta=0). +C The polynomial degree N=NP-1. +C Z and W are in single precision, but all the arithmetic +C operations are done in double precision. +C +C-------------------------------------------------------------------- + REAL Z(1),W(1) + ALPHA = 0. + BETA = 0. + CALL ZWGJ (Z,W,NP,ALPHA,BETA) + RETURN + END +C + SUBROUTINE ZWGLL (Z,W,NP) +C-------------------------------------------------------------------- +C +C Generate NP Gauss-Lobatto Legendre points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha=0,beta=0). +C The polynomial degree N=NP-1. +C Z and W are in single precision, but all the arithmetic +C operations are done in double precision. +C +C-------------------------------------------------------------------- + REAL Z(1),W(1) + ALPHA = 0. + BETA = 0. + CALL ZWGLJ (Z,W,NP,ALPHA,BETA) + RETURN + END +C + SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Single precision version. +C +C-------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD(NZD),WD(NZD) + REAL Z(1),W(1),ALPHA,BETA +C + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in ZWGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + CALL ZWGJD (ZD,WD,NP,ALPHAD,BETAD) + DO 100 I=1,NP + Z(I) = ZD(I) + W(I) = WD(I) + 100 CONTINUE + RETURN + END +C + SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Double precision version. +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z(1),W(1),ALPHA,BETA +C + N = NP-1 + DN = ((N)) + ONE = 1. + TWO = 2. + APB = ALPHA+BETA +C + IF (NP.LE.0) THEN + WRITE (6,*) 'ZWGJD: Minimum number of Gauss points is 1',np + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'ZWGJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + IF (NP.EQ.1) THEN + Z(1) = (BETA-ALPHA)/(APB+TWO) + W(1) = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)/GAMMAF(APB+TWO) + $ * TWO**(APB+ONE) + RETURN + ENDIF +C + CALL JACG (Z,NP,ALPHA,BETA) +C + NP1 = N+1 + NP2 = N+2 + DNP1 = ((NP1)) + DNP2 = ((NP2)) + FAC1 = DNP1+ALPHA+BETA+ONE + FAC2 = FAC1+DNP1 + FAC3 = FAC2+ONE + FNORM = PNORMJ(NP1,ALPHA,BETA) + RCOEF = (FNORM*FAC2*FAC3)/(TWO*FAC1*DNP2) + DO 100 I=1,NP + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP2,ALPHA,BETA,Z(I)) + W(I) = -RCOEF/(P*PDM1) + 100 CONTINUE + RETURN + END +C + SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Single precision version. +C +C-------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD(NZD),WD(NZD) + REAL Z(1),W(1),ALPHA,BETA +C + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in ZWGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + CALL ZWGLJD (ZD,WD,NP,ALPHAD,BETAD) + DO 100 I=1,NP + Z(I) = ZD(I) + W(I) = WD(I) + 100 CONTINUE + RETURN + END +C + SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Double precision version. +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z(NP),W(NP),ALPHA,BETA +C + N = NP-1 + NM1 = N-1 + ONE = 1. + TWO = 2. +C + IF (NP.LE.1) THEN + WRITE (6,*) 'ZWGLJD: Minimum number of Gauss-Lobatto points is 2' + WRITE (6,*) 'ZWGLJD: alpha,beta:',alpha,beta,np + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'ZWGLJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + IF (NM1.GT.0) THEN + ALPG = ALPHA+ONE + BETG = BETA+ONE + CALL ZWGJD (Z(2),W(2),NM1,ALPG,BETG) + ENDIF + Z(1) = -ONE + Z(NP) = ONE + DO 100 I=2,NP-1 + W(I) = W(I)/(ONE-Z(I)**2) + 100 CONTINUE + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(1)) + W(1) = ENDW1 (N,ALPHA,BETA)/(TWO*PD) + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(NP)) + W(NP) = ENDW2 (N,ALPHA,BETA)/(TWO*PD) +C + RETURN + END +C + REAL*8 FUNCTION ENDW1 (N,ALPHA,BETA) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 ALPHA,BETA + ZERO = 0. + ONE = 1. + TWO = 2. + THREE = 3. + FOUR = 4. + APB = ALPHA+BETA + IF (N.EQ.0) THEN + ENDW1 = ZERO + RETURN + ENDIF + F1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) + F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO + IF (N.EQ.1) THEN + ENDW1 = F1 + RETURN + ENDIF + FINT1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) + FINT1 = FINT1*TWO**(APB+TWO) + FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) + FINT2 = FINT2*TWO**(APB+THREE) + F2 = (-TWO*(BETA+TWO)*FINT1 + (APB+FOUR)*FINT2) + $ * (APB+THREE)/FOUR + IF (N.EQ.2) THEN + ENDW1 = F2 + RETURN + ENDIF + DO 100 I=3,N + DI = ((I-1)) + ABN = ALPHA+BETA+DI + ABNN = ABN+DI + A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) + A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) + A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) + F3 = -(A2*F2+A1*F1)/A3 + F1 = F2 + F2 = F3 + 100 CONTINUE + ENDW1 = F3 + RETURN + END +C + REAL*8 FUNCTION ENDW2 (N,ALPHA,BETA) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 ALPHA,BETA + ZERO = 0. + ONE = 1. + TWO = 2. + THREE = 3. + FOUR = 4. + APB = ALPHA+BETA + IF (N.EQ.0) THEN + ENDW2 = ZERO + RETURN + ENDIF + F1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) + F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO + IF (N.EQ.1) THEN + ENDW2 = F1 + RETURN + ENDIF + FINT1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) + FINT1 = FINT1*TWO**(APB+TWO) + FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) + FINT2 = FINT2*TWO**(APB+THREE) + F2 = (TWO*(ALPHA+TWO)*FINT1 - (APB+FOUR)*FINT2) + $ * (APB+THREE)/FOUR + IF (N.EQ.2) THEN + ENDW2 = F2 + RETURN + ENDIF + DO 100 I=3,N + DI = ((I-1)) + ABN = ALPHA+BETA+DI + ABNN = ABN+DI + A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) + A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) + A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) + F3 = -(A2*F2+A1*F1)/A3 + F1 = F2 + F2 = F3 + 100 CONTINUE + ENDW2 = F3 + RETURN + END +C + REAL*8 FUNCTION GAMMAF (X) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 X + ZERO = 0.0 + HALF = 0.5 + ONE = 1.0 + TWO = 2.0 + FOUR = 4.0 + PI = FOUR*ATAN(ONE) + GAMMAF = ONE + IF (X.EQ.-HALF) GAMMAF = -TWO*SQRT(PI) + IF (X.EQ. HALF) GAMMAF = SQRT(PI) + IF (X.EQ. ONE ) GAMMAF = ONE + IF (X.EQ. TWO ) GAMMAF = ONE + IF (X.EQ. 1.5 ) GAMMAF = SQRT(PI)/2. + IF (X.EQ. 2.5) GAMMAF = 1.5*SQRT(PI)/2. + IF (X.EQ. 3.5) GAMMAF = 0.5*(2.5*(1.5*SQRT(PI))) + IF (X.EQ. 3. ) GAMMAF = 2. + IF (X.EQ. 4. ) GAMMAF = 6. + IF (X.EQ. 5. ) GAMMAF = 24. + IF (X.EQ. 6. ) GAMMAF = 120. + RETURN + END +C + REAL*8 FUNCTION PNORMJ (N,ALPHA,BETA) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 ALPHA,BETA + ONE = 1. + TWO = 2. + DN = ((N)) + CONST = ALPHA+BETA+ONE + IF (N.LE.1) THEN + PROD = GAMMAF(DN+ALPHA)*GAMMAF(DN+BETA) + PROD = PROD/(GAMMAF(DN)*GAMMAF(DN+ALPHA+BETA)) + PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) + RETURN + ENDIF + PROD = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE) + PROD = PROD/(TWO*(ONE+CONST)*GAMMAF(CONST+ONE)) + PROD = PROD*(ONE+ALPHA)*(TWO+ALPHA) + PROD = PROD*(ONE+BETA)*(TWO+BETA) + DO 100 I=3,N + DINDX = ((I)) + FRAC = (DINDX+ALPHA)*(DINDX+BETA)/(DINDX*(DINDX+ALPHA+BETA)) + PROD = PROD*FRAC + 100 CONTINUE + PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) + RETURN + END +C + SUBROUTINE JACG (XJAC,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Compute NP Gauss points XJAC, which are the zeros of the +C Jacobi polynomial J(NP) with parameters ALPHA and BETA. +C ALPHA and BETA determines the specific type of Gauss points. +C Examples: +C ALPHA = BETA = 0.0 -> Legendre points +C ALPHA = BETA = -0.5 -> Chebyshev points +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 XJAC(1) + DATA KSTOP /10/ + DATA EPS/1.0e-12/ + N = NP-1 + one = 1. + DTH = 4.*ATAN(one)/(2.*((N))+2.) + DO 40 J=1,NP + IF (J.EQ.1) THEN + X = COS((2.*(((J))-1.)+1.)*DTH) + ELSE + X1 = COS((2.*(((J))-1.)+1.)*DTH) + X2 = XLAST + X = (X1+X2)/2. + ENDIF + DO 30 K=1,KSTOP + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,X) + RECSUM = 0. + JM = J-1 + DO 29 I=1,JM + RECSUM = RECSUM+1./(X-XJAC(NP-I+1)) + 29 CONTINUE + DELX = -P/(PD-RECSUM*P) + X = X+DELX + IF (ABS(DELX) .LT. EPS) GOTO 31 + 30 CONTINUE + 31 CONTINUE + XJAC(NP-J+1) = X + XLAST = X + 40 CONTINUE + DO 200 I=1,NP + XMIN = 2. + DO 100 J=I,NP + IF (XJAC(J).LT.XMIN) THEN + XMIN = XJAC(J) + JMIN = J + ENDIF + 100 CONTINUE + IF (JMIN.NE.I) THEN + SWAP = XJAC(I) + XJAC(I) = XJAC(JMIN) + XJAC(JMIN) = SWAP + ENDIF + 200 CONTINUE + RETURN + END +C + SUBROUTINE JACOBF (POLY,PDER,POLYM1,PDERM1,POLYM2,PDERM2, + $ N,ALP,BET,X) +C-------------------------------------------------------------------- +C +C Computes the Jacobi polynomial (POLY) and its derivative (PDER) +C of degree N at X. +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + APB = ALP+BET + POLY = 1. + PDER = 0. + IF (N .EQ. 0) RETURN + POLYL = POLY + PDERL = PDER + POLY = (ALP-BET+(APB+2.)*X)/2. + PDER = (APB+2.)/2. + IF (N .EQ. 1) RETURN + DO 20 K=2,N + DK = ((K)) + A1 = 2.*DK*(DK+APB)*(2.*DK+APB-2.) + A2 = (2.*DK+APB-1.)*(ALP**2-BET**2) + B3 = (2.*DK+APB-2.) + A3 = B3*(B3+1.)*(B3+2.) + A4 = 2.*(DK+ALP-1.)*(DK+BET-1.)*(2.*DK+APB) + POLYN = ((A2+A3*X)*POLY-A4*POLYL)/A1 + PDERN = ((A2+A3*X)*PDER-A4*PDERL+A3*POLY)/A1 + PSAVE = POLYL + PDSAVE = PDERL + POLYL = POLY + POLY = POLYN + PDERL = PDER + PDER = PDERN + 20 CONTINUE + POLYM1 = POLYL + PDERM1 = PDERL + POLYM2 = PSAVE + PDERM2 = PDSAVE + RETURN + END +C + REAL FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGJ through +C the NP Gauss Jacobi points ZGJ at the point Z. +C Single precision version. +C +C--------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD,ZGJD(NZD) + REAL Z,ZGJ(1),ALPHA,BETA + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in HGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ZD = Z + DO 100 I=1,NP + ZGJD(I) = ZGJ(I) + 100 CONTINUE + ALPHAD = ALPHA + BETAD = BETA + HGJ = HGJD (II,ZD,ZGJD,NP,ALPHAD,BETAD) + RETURN + END +C + REAL*8 FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGJD through +C the NZ Gauss-Lobatto Jacobi points ZGJ at the point Z. +C Double precision version. +C +C--------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z,ZGJ(1),ALPHA,BETA + EPS = 1.e-5 + ONE = 1. + ZI = ZGJ(II) + DZ = Z-ZI + IF (ABS(DZ).LT.EPS) THEN + HGJD = ONE + RETURN + ENDIF + CALL JACOBF (PZI,PDZI,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,ZI) + CALL JACOBF (PZ,PDZ,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,Z) + HGJD = PZ/(PDZI*(Z-ZI)) + RETURN + END +C + REAL FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGLJ through +C the NZ Gauss-Lobatto Jacobi points ZGLJ at the point Z. +C Single precision version. +C +C--------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD,ZGLJD(NZD) + REAL Z,ZGLJ(1),ALPHA,BETA + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in HGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ZD = Z + DO 100 I=1,NP + ZGLJD(I) = ZGLJ(I) + 100 CONTINUE + ALPHAD = ALPHA + BETAD = BETA + HGLJ = HGLJD (II,ZD,ZGLJD,NP,ALPHAD,BETAD) + RETURN + END +C + REAL*8 FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGLJD through +C the NZ Gauss-Lobatto Jacobi points ZJACL at the point Z. +C Double precision version. +C +C--------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z,ZGLJ(1),ALPHA,BETA + EPS = 1.e-5 + ONE = 1. + ZI = ZGLJ(I) + DZ = Z-ZI + IF (ABS(DZ).LT.EPS) THEN + HGLJD = ONE + RETURN + ENDIF + N = NP-1 + DN = ((N)) + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,ZI) + CONST = EIGVAL*PI+ALPHA*(ONE+ZI)*PDI-BETA*(ONE-ZI)*PDI + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z) + HGLJD = (ONE-Z**2)*PD/(CONST*(Z-ZI)) + RETURN + END +C + SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss Jacobi points Z. +C Note: D and DT are square matrices. +C Single precision version. +C +C----------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZDD = NMAX) + REAL*8 DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD) + REAL D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA +C + IF (NZ.LE.0) THEN + WRITE (6,*) 'DGJ: Minimum number of Gauss points is 1' + call exitt + ENDIF + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Too large polynomial degree in DGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here Nz=',Nz + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE (6,*) 'DGJ: Alpha and Beta must be greater than -1' + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NZ + ZD(I) = Z(I) + 100 CONTINUE + CALL DGJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) + DO 200 I=1,NZ + DO 200 J=1,NZ + D(I,J) = DD(I,J) + DT(I,J) = DTD(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss Jacobi points Z. +C Note: D and DT are square matrices. +C Double precision version. +C +C----------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + N = NZ-1 + DN = ((N)) + ONE = 1. + TWO = 2. +C + IF (NZ.LE.1) THEN + WRITE (6,*) 'DGJD: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'DGJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + DO 200 I=1,NZ + DO 200 J=1,NZ + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(J)) + IF (I.NE.J) D(I,J) = PDI/(PDJ*(Z(I)-Z(J))) + IF (I.EQ.J) D(I,J) = ((ALPHA+BETA+TWO)*Z(I)+ALPHA-BETA)/ + $ (TWO*(ONE-Z(I)**2)) + DT(J,I) = D(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss-Lobatto Jacobi points Z. +C Note: D and DT are square matrices. +C Single precision version. +C +C----------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZDD = NMAX) + REAL*8 DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD) + REAL D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA +C + IF (NZ.LE.1) THEN + WRITE (6,*) 'DGLJ: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Too large polynomial degree in DGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NZ=',NZ + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE (6,*) 'DGLJ: Alpha and Beta must be greater than -1' + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NZ + ZD(I) = Z(I) + 100 CONTINUE + CALL DGLJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) + DO 200 I=1,NZ + DO 200 J=1,NZ + D(I,J) = DD(I,J) + DT(I,J) = DTD(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss-Lobatto Jacobi points Z. +C Note: D and DT are square matrices. +C Double precision version. +C +C----------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + N = NZ-1 + DN = ((N)) + ONE = 1. + TWO = 2. + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) +C + IF (NZ.LE.1) THEN + WRITE (6,*) 'DGLJD: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'DGLJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + DO 200 I=1,NZ + DO 200 J=1,NZ + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(J)) + CI = EIGVAL*PI-(BETA*(ONE-Z(I))-ALPHA*(ONE+Z(I)))*PDI + CJ = EIGVAL*PJ-(BETA*(ONE-Z(J))-ALPHA*(ONE+Z(J)))*PDJ + IF (I.NE.J) D(I,J) = CI/(CJ*(Z(I)-Z(J))) + IF ((I.EQ.J).AND.(I.NE.1).AND.(I.NE.NZ)) + $ D(I,J) = (ALPHA*(ONE+Z(I))-BETA*(ONE-Z(I)))/ + $ (TWO*(ONE-Z(I)**2)) + IF ((I.EQ.J).AND.(I.EQ.1)) + $ D(I,J) = (EIGVAL+ALPHA)/(TWO*(BETA+TWO)) + IF ((I.EQ.J).AND.(I.EQ.NZ)) + $ D(I,J) = -(EIGVAL+BETA)/(TWO*(ALPHA+TWO)) + DT(J,I) = D(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGLL (D,DT,Z,NZ,NZD) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss-Lobatto Legendre points Z. +C Note: D and DT are square matrices. +C +C----------------------------------------------------------------- + PARAMETER (NMAX=84) + REAL D(NZD,NZD),DT(NZD,NZD),Z(1) + N = NZ-1 + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Subroutine DGLL' + WRITE (6,*) 'Maximum polynomial degree =',NMAX + WRITE (6,*) 'Polynomial degree =',NZ + ENDIF + IF (NZ .EQ. 1) THEN + D(1,1) = 0. + RETURN + ENDIF + FN = (N) + d0 = FN*(FN+1.)/4. + DO 200 I=1,NZ + DO 200 J=1,NZ + D(I,J) = 0. + IF (I.NE.J) D(I,J) = PNLEG(Z(I),N)/ + $ (PNLEG(Z(J),N)*(Z(I)-Z(J))) + IF ((I.EQ.J).AND.(I.EQ.1)) D(I,J) = -d0 + IF ((I.EQ.J).AND.(I.EQ.NZ)) D(I,J) = d0 + DT(J,I) = D(I,J) + 200 CONTINUE + RETURN + END +C + REAL FUNCTION HGLL (I,Z,ZGLL,NZ) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant L through +C the NZ Gauss-Lobatto Legendre points ZGLL at the point Z. +C +C--------------------------------------------------------------------- + REAL ZGLL(1) + EPS = 1.E-5 + DZ = Z - ZGLL(I) + IF (ABS(DZ) .LT. EPS) THEN + HGLL = 1. + RETURN + ENDIF + N = NZ - 1 + ALFAN = (N)*((N)+1.) + HGLL = - (1.-Z*Z)*PNDLEG(Z,N)/ + $ (ALFAN*PNLEG(ZGLL(I),N)*(Z-ZGLL(I))) + RETURN + END +C + REAL FUNCTION HGL (I,Z,ZGL,NZ) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGL through +C the NZ Gauss Legendre points ZGL at the point Z. +C +C--------------------------------------------------------------------- + REAL ZGL(1) + EPS = 1.E-5 + DZ = Z - ZGL(I) + IF (ABS(DZ) .LT. EPS) THEN + HGL = 1. + RETURN + ENDIF + N = NZ-1 + HGL = PNLEG(Z,NZ)/(PNDLEG(ZGL(I),NZ)*(Z-ZGL(I))) + RETURN + END +C + REAL FUNCTION PNLEG (Z,N) +C--------------------------------------------------------------------- +C +C Compute the value of the Nth order Legendre polynomial at Z. +C (Simpler than JACOBF) +C Based on the recursion formula for the Legendre polynomials. +C +C--------------------------------------------------------------------- +C +C This next statement is to overcome the underflow bug in the i860. +C It can be removed at a later date. 11 Aug 1990 pff. +C + IF(ABS(Z) .LT. 1.0E-25) Z = 0.0 +C + P1 = 1. + IF (N.EQ.0) THEN + PNLEG = P1 + RETURN + ENDIF + P2 = Z + P3 = P2 + DO 10 K = 1, N-1 + FK = (K) + P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) + P1 = P2 + P2 = P3 + 10 CONTINUE + PNLEG = P3 + if (n.eq.0) pnleg = 1. + RETURN + END +C + REAL FUNCTION PNDLEG (Z,N) +C---------------------------------------------------------------------- +C +C Compute the derivative of the Nth order Legendre polynomial at Z. +C (Simpler than JACOBF) +C Based on the recursion formula for the Legendre polynomials. +C +C---------------------------------------------------------------------- + P1 = 1. + P2 = Z + P1D = 0. + P2D = 1. + P3D = 1. + DO 10 K = 1, N-1 + FK = (K) + P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) + P3D = ((2.*FK+1.)*P2 + (2.*FK+1.)*Z*P2D - FK*P1D)/(FK+1.) + P1 = P2 + P2 = P3 + P1D = P2D + P2D = P3D + 10 CONTINUE + PNDLEG = P3D + IF (N.eq.0) pndleg = 0. + RETURN + END +C + SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2) +C----------------------------------------------------------------------- +C +C Compute the (one-dimensional) derivative matrix D and its +C transpose DT associated with taking the derivative of a variable +C expanded on a Gauss-Lobatto Legendre mesh (M1), and evaluate its +C derivative on a Guass Legendre mesh (M2). +C Need the one-dimensional interpolation operator IM12 +C (see subroutine IGLLGL). +C Note: D and DT are rectangular matrices. +C +C----------------------------------------------------------------------- + REAL D(ND2,ND1), DT(ND1,ND2), ZM1(ND1), ZM2(ND2), IM12(ND2,ND1) + IF (NZM1.EQ.1) THEN + D (1,1) = 0. + DT(1,1) = 0. + RETURN + ENDIF + EPS = 1.E-6 + NM1 = NZM1-1 + DO 10 IP = 1, NZM2 + DO 10 JQ = 1, NZM1 + ZP = ZM2(IP) + ZQ = ZM1(JQ) + IF ((ABS(ZP) .LT. EPS).AND.(ABS(ZQ) .LT. EPS)) THEN + D(IP,JQ) = 0. + ELSE + D(IP,JQ) = (PNLEG(ZP,NM1)/PNLEG(ZQ,NM1) + $ -IM12(IP,JQ))/(ZP-ZQ) + ENDIF + DT(JQ,IP) = D(IP,JQ) + 10 CONTINUE + RETURN + END +C + SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) +C----------------------------------------------------------------------- +C +C Compute the (one-dimensional) derivative matrix D and its +C transpose DT associated with taking the derivative of a variable +C expanded on a Gauss-Lobatto Jacobi mesh (M1), and evaluate its +C derivative on a Guass Jacobi mesh (M2). +C Need the one-dimensional interpolation operator IM12 +C (see subroutine IGLJGJ). +C Note: D and DT are rectangular matrices. +C Single precision version. +C +C----------------------------------------------------------------------- + REAL D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2), IGLG(ND2,ND1) + PARAMETER (NMAX=84) + PARAMETER (NDD = NMAX) + REAL*8 DD(NDD,NDD), DTD(NDD,NDD) + REAL*8 ZGD(NDD), ZGLD(NDD), IGLGD(NDD,NDD) + REAL*8 ALPHAD, BETAD +C + IF (NPGL.LE.1) THEN + WRITE(6,*) 'DGLJGJ: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF (NPGL.GT.NMAX) THEN + WRITE(6,*) 'Polynomial degree too high in DGLJGJ' + WRITE(6,*) 'Maximum polynomial degree is',NMAX + WRITE(6,*) 'Here NPGL=',NPGL + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE(6,*) 'DGLJGJ: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NPG + ZGD(I) = ZG(I) + DO 100 J=1,NPGL + IGLGD(I,J) = IGLG(I,J) + 100 CONTINUE + DO 200 I=1,NPGL + ZGLD(I) = ZGL(I) + 200 CONTINUE + CALL DGLJGJD (DD,DTD,ZGLD,ZGD,IGLGD,NPGL,NPG,NDD,NDD,ALPHAD,BETAD) + DO 300 I=1,NPG + DO 300 J=1,NPGL + D(I,J) = DD(I,J) + DT(J,I) = DTD(J,I) + 300 CONTINUE + RETURN + END +C + SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) +C----------------------------------------------------------------------- +C +C Compute the (one-dimensional) derivative matrix D and its +C transpose DT associated with taking the derivative of a variable +C expanded on a Gauss-Lobatto Jacobi mesh (M1), and evaluate its +C derivative on a Guass Jacobi mesh (M2). +C Need the one-dimensional interpolation operator IM12 +C (see subroutine IGLJGJ). +C Note: D and DT are rectangular matrices. +C Double precision version. +C +C----------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2) + REAL*8 IGLG(ND2,ND1), ALPHA, BETA +C + IF (NPGL.LE.1) THEN + WRITE(6,*) 'DGLJGJD: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE(6,*) 'DGLJGJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + EPS = 1.e-6 + ONE = 1. + TWO = 2. + NGL = NPGL-1 + DN = ((NGL)) + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) +C + DO 100 I=1,NPG + DO 100 J=1,NPGL + DZ = ABS(ZG(I)-ZGL(J)) + IF (DZ.LT.EPS) THEN + D(I,J) = (ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)))/ + $ (TWO*(ONE-ZG(I)**2)) + ELSE + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZG(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZGL(J)) + FACI = ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)) + FACJ = ALPHA*(ONE+ZGL(J))-BETA*(ONE-ZGL(J)) + CONST = EIGVAL*PJ+FACJ*PDJ + D(I,J) = ((EIGVAL*PI+FACI*PDI)*(ZG(I)-ZGL(J)) + $ -(ONE-ZG(I)**2)*PDI)/(CONST*(ZG(I)-ZGL(J))**2) + ENDIF + DT(J,I) = D(I,J) + 100 CONTINUE + RETURN + END +C + SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss Legendre mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss Legendre points. +C Z2 : NZ2 points on mesh M. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGL(J,ZI,Z1,NZ1) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END +c + SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss-Lobatto Legendre mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss-Lobatto Legendre points. +C Z2 : NZ2 points on mesh M. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGLL(J,ZI,Z1,NZ1) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END +C + SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss Jacobi mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss Jacobi points. +C Z2 : NZ2 points on mesh M. +C Single precision version. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGJ(J,ZI,Z1,NZ1,ALPHA,BETA) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END +c + SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss-Lobatto Jacobi mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss-Lobatto Jacobi points. +C Z2 : NZ2 points on mesh M. +C Single precision version. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGLJ(J,ZI,Z1,NZ1,ALPHA,BETA) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END diff --git a/src/timers.c b/src/timers.c new file mode 100644 index 0000000..f40bf91 --- /dev/null +++ b/src/timers.c @@ -0,0 +1,24 @@ + +#include /* for struct timespec */ + +double fclock_gettime(void) { + + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + double timeval = (double) (ts.tv_sec + ts.tv_nsec*1.0E-9); + + return timeval; +} + +double fclock_gettime_( void ) __attribute__((alias("fclock_gettime"))); + +#ifdef BGQTIMER +#include + +double ReadTimeBase_Double( void ) { + return (double) GetTimeBase(); +} + +double readtimebase_double( void ) __attribute__((alias("ReadTimeBase_Double"))); +double readtimebase_double_( void ) __attribute__((alias("ReadTimeBase_Double"))); +#endif diff --git a/test/example1/SIZE b/test/example1/SIZE new file mode 100644 index 0000000..941aedc --- /dev/null +++ b/test/example1/SIZE @@ -0,0 +1,17 @@ +C Dimension file to be included + + parameter (ldim=3) ! dimension + parameter (lx1=12,ly1=lx1,lz1=lx1) ! polynomial order + + parameter (lp =49152) ! max number of processors + parameter (lelt= 512) ! max number of elements, per proc + + parameter (lelg=lelt*lp) ! max total elements in a test + parameter (lelx=lelg,lely=1,lelz=1) ! max elements in each direction + + parameter (ldimt=1,ldimt1=ldimt+1) ! used in 'include' files + + + common /dimn/ nelx,nely,nelz,nelt ! local element common block + $ , nx1,ny1,nz1,ndim,nfield,nid + diff --git a/test/example1/data.rea b/test/example1/data.rea new file mode 100644 index 0000000..36077f4 --- /dev/null +++ b/test/example1/data.rea @@ -0,0 +1,5 @@ +.true. = ifbrick ! brick or linear geometry +512 512 1 = iel0,ielN,ielD (per processor) ! range of number of elements per proc. + 9 12 3 = nx0,nxN,nxD ! poly. order range for nx1 + 1 1 1 = npx, npy, npz ! processor distribution in x,y,z + 1 1 1 = mx, my, mz ! local element distribution in x,y,z diff --git a/test/example1/makefile.template b/test/example1/makefile.template new file mode 100644 index 0000000..81d8805 --- /dev/null +++ b/test/example1/makefile.template @@ -0,0 +1,140 @@ +BINNAME=nekbone +CASENAME= +CASEDIR= +S= +J:=$S/jl +OPT_INCDIR:=./ +OBJDIR=obj +IFMPI= +F77= +CC= +P= +PPPO= +PPS= +G= +OPT_FLAGS_STD= +USR= +USR_LFLAGS= + +################################################################################ + +lFLAGS = $(USR_LFLAGS) + +PPS_F = $(patsubst %,$(PPPO)-D%,$(PPS)) +PPS_C = $(patsubst %,-D%,$(PPS)) + +#NEW ######################################################################### +EXTRA = cg.o driver.o math.o omp.o mxm_wrapper.o prox_dssum.o prox_setup.o semhat.o \ +speclib.o timers.o + +################################################################################ +# MXM +MXM = mxm_std.o + +# JL Routines ################################################################### +JO = jl_ +JL := -DPREFIX=jl_ + +JLCORE = $(JO)gs.o $(JO)sort.o $(JO)sarray_transfer.o $(JO)sarray_sort.o \ +$(JO)gs_local.o $(JO)crystal.o $(JO)comm.o $(JO)tensor.o $(JO)fail.o \ +$(JO)fcrystal.o + +COMM_MPI := comm_mpi.o +ifeq ($(IFMPI),false) + COMM_MPI := ${COMM_MPI} mpi_dummy.o +endif + +ifeq ($(IFMPI),false) + DUMMY:= $(shell cp $S/mpi_dummy.h $S/mpif.h) +else + DUMMY:= $(shell rm -rf $S/mpif.h) +endif + +##################################################################################### +TMP0 = $(EXTRA) $(COMM_MPI) $(MXM) +NOBJS_F0 = $(patsubst %,$(OBJDIR)/%,$(TMP0)) +TMP0c = $(JLCORE) +NOBJS_C0 = $(patsubst %,$(OBJDIR)/%,$(TMP0c)) + +NOBJS0 = $(NOBJS_F0) $(NOBJS_C0) +############################################################################## + +L0 = $(G) -O0 +L2 = $(G) $(OPT_FLAGS_STD) +L3 = $(G) $(OPT_FLAGS_STD) +L4 = $(L3) + +FL0 = $(L0) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL2i4 = $(L2) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL2 = $(L2) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL3 = $(L3) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL4 = $(L4) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) + +cFL0 = $(L0) $(PPS_C) +cFL2 = $(L2) $(PPS_C) +cFL3 = $(L3) $(PPS_C) +cFL4 = $(L4) $(PPS_C) +################################################################################ +all : nekbone + +objdir: + @mkdir $(OBJDIR) 2>/dev/null; cat /dev/null + +nekbone: objdir $(NOBJS0) + $(F77) -o ${BINNAME} $G $(NOBJS0) $(lFLAGS) + @if test -f ${BINNAME}; then \ + echo "#############################################################"; \ + echo "# Compilation successful! #"; \ + echo "#############################################################"; \ + size ${BINNAME}; \ + echo ""; \ + else \ + echo -e "\033[1;31;38m" "ERROR: Compilation failed!"; \ + echo -e "\033[0m"; \ + fi +ifeq ($(IFMPI),false) + @rm -rf $S/mpif.h +endif + +clean: + rm -rf ./obj ${BINNAME} +ifeq ($(IFMPI),false) + @rm -rf $S/mpif.h +endif + +$(NOBJS_F0) : SIZE +# CORE ############################################################################ +$(OBJDIR)/cg.o :$S/cg.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/driver.o :$S/driver.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/math.o :$S/math.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/omp.o :$S/omp.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/prox_dssum.o :$S/prox_dssum.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/prox_setup.o :$S/prox_setup.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/semhat.o :$S/semhat.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/speclib.o :$S/speclib.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/blas.o :$S/blas.f; $(F77) -c $(FL2i4) $< -o $@ +$(OBJDIR)/byte_mpi.o :$S/byte_mpi.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/comm_mpi.o :$S/comm_mpi.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/mpi_dummy.o :$S/mpi_dummy.f; $(F77) -c $(FL2) $< -o $@ +# MXM ############################################################################ +$(OBJDIR)/mxm_wrapper.o :$S/mxm_wrapper.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/mxm_std.o :$S/mxm_std.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/bg_aligned3.o :$S/bg_aligned3.s; $(CC) -c $< -o $@ +$(OBJDIR)/bg_mxm3.o :$S/bg_mxm3.s; $(CC) -c $< -o $@ +$(OBJDIR)/bg_mxm44.o :$S/bg_mxm44.s; $(CC) -c $< -o $@ +$(OBJDIR)/bg_mxm44_uneven.o :$S/bg_mxm44_uneven.s; $(CC) -c $< -o $@ +$(OBJDIR)/k10_mxm.o :$S/k10_mxm.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +# C Files ################################################################################## +$(OBJDIR)/byte.o :$S/byte.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/chelpers.o :$S/chelpers.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/timers.o :$S/timers.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)fail.o :$(J)/fail.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)tensor.o :$(J)/tensor.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)sort.o :$(J)/sort.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)sarray_sort.o :$(J)/sarray_sort.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)comm.o :$(J)/comm.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)crystal.o :$(J)/crystal.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)sarray_transfer.o :$(J)/sarray_transfer.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)fcrystal.o :$(J)/fcrystal.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)gs.o :$(J)/gs.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)gs_local.o :$(J)/gs_local.c; $(CC) -c $(cFL2) $(JL) $< -o $@ diff --git a/test/example1/makenek b/test/example1/makenek new file mode 100755 index 0000000..228bcd1 --- /dev/null +++ b/test/example1/makenek @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="$HOME/Nekbone/src" + +# Fortran compiler +F77="mpif77" + +# C compiler +CC="mpicc" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +# NEKCOMM, NEKDLAY, BG, MGRID +#PPLIST="?" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +#USR_LFLAGS="-L/usr/lib -lfoo" + +# generic compiler flags +#G="-g" + +# optimization flags +#OPT_FLAGS_STD="" +#OPT_FLAGS_MAG="" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/makenek-bgq b/test/example1/makenek-bgq new file mode 100755 index 0000000..5210815 --- /dev/null +++ b/test/example1/makenek-bgq @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="../../src" + +# Fortran compiler +F77="mpixlf77_r -qsmp=omp -qnosave" + +# C compiler +CC="mpixlc_r -qsmp=omp" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8" +PPLIST="TIMERS BGQTIMERS" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +#USR_LFLAGS="-L/usr/lib/ -lfoo" + +# generic compiler flags +#G="-g" + +# optimization flags +OPT_FLAGS_STD="-O3" +OPT_FLAGS_MAG="-O3" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/makenek-cray-knl b/test/example1/makenek-cray-knl new file mode 100755 index 0000000..c0658f2 --- /dev/null +++ b/test/example1/makenek-cray-knl @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="../../src" + +# Fortran compiler +F77="ftn" + +# C compiler +CC="cc" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8" +PPLIST="TIMERS CGTIMERS" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +USR_LFLAGS="-qopenmp -dynamic -mcmodel=medium -shared-intel" + +# generic compiler flags +#G="-g" + +# optimization flags +OPT_FLAGS_STD="-qopenmp -dynamic -O3 -g -xMIC-AVX512 -mcmodel=medium -shared-intel " +OPT_FLAGS_MAG="-qopenmp -dynamic -O3 -g -xMIC-AVX512 -mcmodel=medium -shared-intel" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/makenek-intel b/test/example1/makenek-intel new file mode 100755 index 0000000..209dfc3 --- /dev/null +++ b/test/example1/makenek-intel @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="../../src" + +# Fortran compiler +F77="mpiifort" + +# C compiler +CC="mpiicc" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8" +PPLIST="TIMERS CGTIMERS" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +USR_LFLAGS="-qopenmp -mcmodel=medium -shared-intel" + +# generic compiler flags +#G="-g" + +# optimization flags +OPT_FLAGS_STD="-qopenmp -O3 -g -xHost -mcmodel=medium -shared-intel" +OPT_FLAGS_MAG="-qopenmp -O3 -g -xHost -mcmodel=medium -shared-intel" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/nekpmpi b/test/example1/nekpmpi new file mode 100755 index 0000000..1c4cc8a --- /dev/null +++ b/test/example1/nekpmpi @@ -0,0 +1,4 @@ +rm -f logfile +mv $1.log.$2 $1.log1.$2 +mpiexec -np $2 ./nekbone $1 > $1.log.$2 +ln $1.log.$2 logfile