diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..28b5f6b --- /dev/null +++ b/CHANGES @@ -0,0 +1,41 @@ +*********************************************************** +* Changes in 2.0 * +*********************************************************** + -Subroutine gsync() has changed to nekgsync() to avoid + possible conflict on certain architectures + + -Executable is renamed 'nekbone' to replace 'nekproxy' and + other naming changes. + + -iel0 and ielN set in data.rea file are now used to control the + range of tests ran. Test range in size from iel0 + to ielN elements per process. (prevoiusly tests + were ran from 1 to lelt elements per process) The + maximum value of ielN is lelt. + + -nx0 and nxN set in data.rea file are now used to control the + range of polynomial orders. Ranging from nx0 to + nxN, where nxN<=lx1 (which is set in SIZE). Previously + tests only ran with nx1=lx1. The default is set to + reflect this, but nekbone now supports a range of + polynomial orders without recompiling the code. +*********************************************************** +* Changes in 2.1 * +*********************************************************** + -Fixed nx0 and nxN control of polynomial order. Default is + now to use lx1 until further notice. Variable + nx1 caused memory unstabilities and needs further + development. + -Fixed a memory copy bug in the jl/ array transfer code. + sarray_trasfer, used for the tuple transfer, should + be fixed now. + +*********************************************************** +* Changes in 2.3 * +*********************************************************** + - added OpenMP parallelism, MPITHREADS preprocessor macro + controls if MPI is called from one or multiple threads + - added timers controlled by TIMERS preprocessor macro + - fixed gather-scatter operation gsop() to always use pairwise + method + - switched to using system_clock routine in dummy mpi_wtime() diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..9f84af5 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,34 @@ + COPYRIGHT + +The following is a notice of limited availability of the code, and disclaimer +which must be included in the prologue of the code and in all source listings +of the code. + +Copyright Notice + + 2012 University of Chicago + +Permission is hereby granted to use, reproduce, prepare derivative works, and +to redistribute to others. This software was authored by: + +P. Fischer: (630) 252-6018; FAX: (630) 252-5986; email: fischer@mcs.anl.gov +Mathematics and Computer Science Division +Argonne National Laboratory, Argonne IL 60439 + + GOVERNMENT LICENSE + +Portions of this material resulted from work developed under a U.S. +Government Contract and are subject to the following license: the Government +is granted for itself and others acting on its behalf a paid-up, nonexclusive, +irrevocable worldwide license in this computer software to reproduce, prepare +derivative works, and perform publicly and display publicly. + + DISCLAIMER + +This computer code material was prepared, in part, as an account of work +sponsored by an agency of the United States Government. Neither the United +States, nor the University of Chicago, nor any of their employees, makes any +warranty express or implied, or assumes any legal liability or responsibility +for the accuracy, completeness, or usefulness of any information, apparatus, +product, or process disclosed, or represents that its use would not infringe +privately owned rights. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..fab3aad --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# nekbone_2_3_5 diff --git a/readme.pdf b/readme.pdf new file mode 100644 index 0000000..8c8aa70 Binary files /dev/null and b/readme.pdf differ diff --git a/src/DXYZ b/src/DXYZ new file mode 100644 index 0000000..e5eaca0 --- /dev/null +++ b/src/DXYZ @@ -0,0 +1,5 @@ +C +C Elemental derivative operators +C + common /dxyz/ dxm1(lx1,lx1), dxtm1(lx1,lx1) + diff --git a/src/INPUT b/src/INPUT new file mode 100644 index 0000000..124d572 --- /dev/null +++ b/src/INPUT @@ -0,0 +1,19 @@ +C +C Input parameters from preprocessors. +C +C Note that in parallel implementations, we distinguish between +C distributed data (LELT) and uniformly distributed data. +C + + common /input5/ xc(8,lelt),yc(8,lelt),zc(8,lelt) + $ ,bc(5,6,lelt,0:ldimt1) + + + common /input8/ cbc(6,lelt,0:ldimt1),ccurve(12,lelt) + character*1 ccurve + character*3 cbc + + real mflops + integer*8 flop_a, flop_cg + common /cflops/ flop_a,flop_cg,mflops + diff --git a/src/MASS b/src/MASS new file mode 100644 index 0000000..16cde4d --- /dev/null +++ b/src/MASS @@ -0,0 +1,4 @@ + common /mass/ + $ bm1 (lx1,ly1,lz1,lelt) + $ ,binvm1(lx1,ly1,lz1,lelt) + $ ,volvm1 diff --git a/src/PARALLEL b/src/PARALLEL new file mode 100644 index 0000000..67372b4 --- /dev/null +++ b/src/PARALLEL @@ -0,0 +1,31 @@ +C +C Communication information +C NOTE: NID is stored in 'SIZE' for greater accessibility + common /cube1/ node,pid,np,nullpid,node0 + integer node,pid,np,nullpid,node0 + + +c Maximum number of elements (limited to 2**31/12, at least for now) + parameter(nelgt_max = 178956970) + + common /hcglb/ nvtot,nelgf(0:ldimt1) + $ ,lglel(lelt) +c $ ,gllel(lelg) +c $ ,gllnid(lelg) + $ ,nelgv,nelgt + + integer lglel +c integer gllel,gllnid + integer*8 nvtot + + common /diagl/ ifgprnt + logical ifgprnt + common/precsn/ wdsize,isize,lsize,csize + common/precsl/ ifdblas + integer wdsize,isize,lsize,csize + logical ifdblas +C +C crystal-router, gather-scatter, and xxt handles (xxt=csr grid solve) +C + common /comm_handles/ cr_h, gsh, gsh_fld(0:ldimt1), xxth(ldimt1) + integer cr_h, gsh, gsh_fld , xxth diff --git a/src/README b/src/README new file mode 100644 index 0000000..8996aa9 --- /dev/null +++ b/src/README @@ -0,0 +1,66 @@ + Nek_comm-1.0 + +This is the communication testing kernel for the MPI +all reduce and point to point communication used within +the nekbone mini-app and standard NEK5000. This kernel +runs a battery of platform timers using MPI standard. + +To Run: + + NOTE - Unlike the other nek codes, a data.rea file + is not needed, since there is no geometry + being set up. + + After untarring nek_comm-1.0.tgz, change working + directory to the nek_comm/test/ directory: + cd ~/nek_comm/test/ + + Edit the makenek script to specify the compiler and + appropriate compiler flags. + + Compile and link the code using the makenek script: + ./makenek n + where n is the chosen name of test run. + + A successful compilation will result with: + + ############################################################# + # Compilation successful! # + ############################################################# + + And a nekcomm executable. + + Run the code in parallel using the provided nekpmpi script + by specifying the test name (for logfile naming purposes) + and the number of processors. For example, to run a test + called 'n' on 4 processes: + ./nekpmpi n 4 + + This will produce a logfile, n.log.4. + + +Interpreting results: + + The logfile will have a header describing the parameters the + test was ran with and the output of the timing tests. + + All reduce tests are output with the a 'gop' tag: + np nwds time1 time2 + where, + np - number of processors + nwds - number of words + tmsg - time per message + tpwd - time per word + + + Point to point tests are output with the 'pg' tag: + nodeb np nloop nwds tmsg tpwd + where, + nodeb - the second processor node 0 is + testing with + np - number of processors + nloop - number of tests ran with these nodes + nwds - number of words per message + tmsg - time per message + tpwd - time per word + diff --git a/src/TIMER b/src/TIMER new file mode 100644 index 0000000..60a761c --- /dev/null +++ b/src/TIMER @@ -0,0 +1,19 @@ + + integer tmax + parameter (tmax = 1024) + + integer gopi(tmax) + + real ttemp1, ttemp2, ttemp3, ttemp4 + + real trzero(tmax), tcopy(tmax), tsolvem(tmax) + real tglsc3a(tmax), tglsc3b(tmax), tglsc3c(tmax), tglsc3d(tmax) + real tadd2s1(tmax), tadd2s2a(tmax), tadd2s2b(tmax), tadd2s2c(tmax) + real tlocalgrad3(tmax), twrwswt(tmax), tlocalgrad3t(tmax) + real tgsop(tmax), tgop(4,tmax) + + real*8 dnekclock + + common /timer/ trzero, tcopy, tsolvem, tglsc3a, tglsc3b, tglsc3c, + +tglsc3d, tadd2s1, tadd2s2a, tadd2s2b, tadd2s2c, tlocalgrad3, + +twrwswt, tlocalgrad3t, tgsop, tgop, gopi diff --git a/src/TOTAL b/src/TOTAL new file mode 100644 index 0000000..814bc18 --- /dev/null +++ b/src/TOTAL @@ -0,0 +1,5 @@ + include 'DXYZ' + include 'INPUT' + include 'MASS' + include 'PARALLEL' + include 'WZ' diff --git a/src/WZ b/src/WZ new file mode 100644 index 0000000..502e87c --- /dev/null +++ b/src/WZ @@ -0,0 +1,7 @@ + + +c Gauss-Labotto and Gauss points + common /gauss/ zgm1(lx1,3) + +c Weights + common /wxyz/ wxm1(lx1), wym1(ly1), wzm1(lz1), w3m1(lx1,ly1,lz1) diff --git a/src/bg_aligned3.s b/src/bg_aligned3.s new file mode 100644 index 0000000..8a5ab9a --- /dev/null +++ b/src/bg_aligned3.s @@ -0,0 +1,41 @@ +.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4 +.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9 +.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14 +.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19 +.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24 +.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29 +.set r30,30; .set r31,31 +.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4 +.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9 +.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14 +.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19 +.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24 +.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29 +.set f30,30; .set f31,31 + +.file "bg_aligned3.s" + +.globl bg_aligned3 +.type bg_aligned3, @function +.size bg_aligned3, 48 + +.section ".text" +.align 2 + +bg_aligned3: + andi. r0,r3,15 + clrlwi r9,r4,28 + cmpwi cr7,r9,0 + li r3,0 + li r0,0 + bne- .L.das_label.58 + andi. r9,r5,15 + bne- cr7,.L.das_label.58 + bne- .L.das_label.58 + li r0,1 + .L.das_label.58: + stw r0,0(r6) + blr + + +.ident "GCC: (GNU) 3.2" diff --git a/src/bg_mxm3.s b/src/bg_mxm3.s new file mode 100644 index 0000000..ac8ffe5 --- /dev/null +++ b/src/bg_mxm3.s @@ -0,0 +1,406 @@ +.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4 +.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9 +.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14 +.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19 +.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24 +.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29 +.set r30,30; .set r31,31 +.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4 +.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9 +.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14 +.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19 +.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24 +.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29 +.set f30,30; .set f31,31 + +.file "bg_mxm3.s" + +.globl bg_mxm3 +.type bg_mxm3, @function +.size bg_mxm3, 1412 + +.section ".text" +.align 2 + +bg_mxm3: + stwu r1,-96(r1) + mflr r0 + stw r0,100(r1) + andi. r0,r7,15 + stw r15,28(r1) + mr r15,r8 + stw r16,32(r1) + mr r16,r6 + stw r25,68(r1) + mr r25,r5 + stw r28,80(r1) + mr r28,r4 + stw r30,88(r1) + mr r30,r7 + stw r31,92(r1) + mr r31,r3 + stw r14,24(r1) + stw r17,36(r1) + stw r18,40(r1) + stw r19,44(r1) + stw r20,48(r1) + stw r21,52(r1) + stw r22,56(r1) + stw r23,60(r1) + stw r26,72(r1) + stw r27,76(r1) + stw r29,84(r1) + bne- .L.das_label.15 + .L.das_label.3: + lis r23,dummy@ha + li r12,0 + addi r23,r23,dummy@l + li r13,0 + li r27,16 + addi r23,r23,-16 + stfpdux f13,r23,r27 + stfpdux f14,r23,r27 + stfpdux f15,r23,r27 + stfpdux f16,r23,r27 + stfpdux f17,r23,r27 + stfpdux f18,r23,r27 + stfpdux f19,r23,r27 + stfpdux f20,r23,r27 + stfpdux f21,r23,r27 + stfpdux f22,r23,r27 + stfpdux f23,r23,r27 + stfpdux f24,r23,r27 + stfpdux f25,r23,r27 + stfpdux f26,r23,r27 + stfpdux f27,r23,r27 + stfpdux f28,r23,r27 + stfpdux f29,r23,r27 + stfpdux f30,r23,r27 + stfpdux f31,r23,r27 + lis r9,10922 + lwz r4,0(r28) + ori r9,r9,43691 + lwz r15,0(r15) + lwz r6,0(r16) + mulhw r14,r4,r9 + srawi r11,r15,31 + srawi r16,r6,1 + addze r16,r16 + srawi r0,r4,31 + mulhw r15,r15,r9 + subf r14,r0,r14 + rlwinm r26,r16,4,0,27 + mulli r22,r16,80 + subf r15,r11,r15 + subfic r22,r22,16 + mfctr r29 + li r18,0 + rlwinm r28,r4,3,0,28 + cmpw r18,r15 + addi r28,r28,-32 + addi r19,r16,-2 + cmpwi cr7,r12,0 + bge- .L.das_label.10 + .L.das_label.4: + mullw r23,r4,r18 + li r17,0 + cmpw r17,r14 + mulli r23,r23,48 + add r23,r30,r23 + addi r23,r23,-16 + bge- .L.das_label.9 + mullw r9,r16,r18 + cmpwi cr6,r19,0 + mulli r9,r9,96 + mulli r0,r6,40 + add r9,r9,r0 + add r9,r9,r25 + addi r11,r9,-16 + .L.das_label.5: + mulli r20,r17,48 + rlwinm r0,r4,3,0,28 + add r20,r31,r20 + subf r20,r0,r20 + addi r20,r20,32 + lfpdux f1,r20,r28 + mr r21,r11 + lfpdux f7,r21,r22 + lfpdux f2,r20,r27 + lfpdux f3,r20,r27 + lfpdux f8,r21,r26 + lfpdux f9,r21,r26 + lfpdux f4,r20,r28 + lfpdux f10,r21,r26 + lfpdux f5,r20,r27 + lfpdux f11,r21,r26 + lfpdux f12,r21,r26 + fxpmul f13,f7,f1 + fxpmul f16,f8,f1 + lfpdux f6,r20,r27 + fxpmul f19,f9,f1 + fxpmul f22,f10,f1 + fxpmul f25,f11,f1 + fxpmul f28,f12,f1 + fxpmul f14,f7,f2 + lfpdux f1,r20,r28 + fxpmul f17,f8,f2 + fxpmul f20,f9,f2 + fxpmul f23,f10,f2 + fxpmul f26,f11,f2 + fxpmul f29,f12,f2 + fxpmul f15,f7,f3 + lfpdux f2,r20,r27 + fxpmul f18,f8,f3 + fxpmul f21,f9,f3 + fxpmul f24,f10,f3 + fxpmul f27,f11,f3 + fxpmul f30,f12,f3 + fxcsmadd f13,f7,f4,f13 + lfpdux f3,r20,r27 + fxcsmadd f16,f8,f4,f16 + fxcsmadd f14,f7,f5,f14 + fxcsmadd f17,f8,f5,f17 + fxcsmadd f19,f9,f4,f19 + fxcsmadd f22,f10,f4,f22 + fxcsmadd f25,f11,f4,f25 + fxcsmadd f28,f12,f4,f28 + fxcsmadd f20,f9,f5,f20 + fxcsmadd f15,f7,f6,f15 + lfpdux f4,r20,r28 + fxcsmadd f18,f8,f6,f18 + lfpdux f7,r21,r22 + fxcsmadd f23,f10,f5,f23 + fxcsmadd f26,f11,f5,f26 + fxcsmadd f29,f12,f5,f29 + lfpdux f8,r21,r26 + fxcsmadd f21,f9,f6,f21 + fxcsmadd f24,f10,f6,f24 + lfpdux f5,r20,r27 + lfpdux f9,r21,r26 + lfpdux f10,r21,r26 + fxcsmadd f27,f11,f6,f27 + fxcsmadd f30,f12,f6,f30 + lfpdux f11,r21,r26 + beq- cr6,.L.das_label.6 + mtctr r19 + +.__loopk: + lfpdux f12,r21,r26 + fxcpmadd f13,f7,f1,f13 + fxcpmadd f16,f8,f1,f16 + lfpdux f6,r20,r27 + fxcpmadd f19,f9,f1,f19 + fxcpmadd f22,f10,f1,f22 + fxcpmadd f25,f11,f1,f25 + fxcpmadd f28,f12,f1,f28 + fxcpmadd f14,f7,f2,f14 + lfpdux f1,r20,r28 + fxcpmadd f17,f8,f2,f17 + fxcpmadd f20,f9,f2,f20 + fxcpmadd f23,f10,f2,f23 + fxcpmadd f26,f11,f2,f26 + fxcpmadd f29,f12,f2,f29 + fxcpmadd f15,f7,f3,f15 + lfpdux f2,r20,r27 + fxcpmadd f18,f8,f3,f18 + fxcpmadd f21,f9,f3,f21 + fxcpmadd f24,f10,f3,f24 + fxcpmadd f27,f11,f3,f27 + fxcpmadd f30,f12,f3,f30 + fxcsmadd f13,f7,f4,f13 + lfpdux f3,r20,r27 + fxcsmadd f16,f8,f4,f16 + fxcsmadd f14,f7,f5,f14 + fxcsmadd f17,f8,f5,f17 + fxcsmadd f19,f9,f4,f19 + fxcsmadd f22,f10,f4,f22 + fxcsmadd f25,f11,f4,f25 + fxcsmadd f28,f12,f4,f28 + fxcsmadd f20,f9,f5,f20 + fxcsmadd f15,f7,f6,f15 + lfpdux f4,r20,r28 + fxcsmadd f18,f8,f6,f18 + lfpdux f7,r21,r22 + fxcsmadd f23,f10,f5,f23 + fxcsmadd f26,f11,f5,f26 + fxcsmadd f29,f12,f5,f29 + lfpdux f8,r21,r26 + fxcsmadd f21,f9,f6,f21 + fxcsmadd f24,f10,f6,f24 + lfpdux f5,r20,r27 + lfpdux f9,r21,r26 + lfpdux f10,r21,r26 + fxcsmadd f27,f11,f6,f27 + fxcsmadd f30,f12,f6,f30 + lfpdux f11,r21,r26 + bdnz+ .__loopk + .L.das_label.6: + lfpdux f12,r21,r26 + fxcpmadd f13,f7,f1,f13 + fxcpmadd f14,f7,f2,f14 + fxcpmadd f15,f7,f3,f15 + lfpdux f6,r20,r27 + fxcpmadd f16,f8,f1,f16 + fxcpmadd f17,f8,f2,f17 + fxcpmadd f18,f8,f3,f18 + fxcsmadd f13,f7,f4,f13 + fxcsmadd f14,f7,f5,f14 + fxcsmadd f15,f7,f6,f15 + fxcsmadd f16,f8,f4,f16 + fxcsmadd f17,f8,f5,f17 + fxcsmadd f18,f8,f6,f18 + stfpdux f13,r23,r27 + fxcpmadd f19,f9,f1,f19 + fxcpmadd f20,f9,f2,f20 + stfpdux f14,r23,r27 + fxcpmadd f21,f9,f3,f21 + fxcpmadd f22,f10,f1,f22 + stfpdux f15,r23,r27 + fxcpmadd f23,f10,f2,f23 + fxcpmadd f24,f10,f3,f24 + stfpdux f16,r23,r28 + fxcsmadd f19,f9,f4,f19 + fxcsmadd f20,f9,f5,f20 + stfpdux f17,r23,r27 + fxcsmadd f21,f9,f6,f21 + fxcsmadd f22,f10,f4,f22 + stfpdux f18,r23,r27 + fxcsmadd f23,f10,f5,f23 + fxcsmadd f24,f10,f6,f24 + stfpdux f19,r23,r28 + fxcpmadd f25,f11,f1,f25 + fxcpmadd f26,f11,f2,f26 + stfpdux f20,r23,r27 + fxcpmadd f27,f11,f3,f27 + fxcpmadd f28,f12,f1,f28 + stfpdux f21,r23,r27 + fxcpmadd f29,f12,f2,f29 + fxcpmadd f30,f12,f3,f30 + stfpdux f22,r23,r28 + clrlwi r0,r23,28 + subfic r10,r13,0 + adde r9,r10,r13 + neg r0,r0 + rlwinm r0,r0,1,31,31 + and. r10,r0,r9 + beq- .L.das_label.7 + mr r13,r23 + .L.das_label.7: + fxcsmadd f25,f11,f4,f25 + fxcsmadd f26,f11,f5,f26 + stfpdux f23,r23,r27 + clrlwi r0,r23,28 + mfcr r9 + rlwinm r9,r9,31,31,31 + neg r0,r0 + rlwinm r0,r0,1,31,31 + and. r10,r0,r9 + beq- .L.das_label.8 + mr r12,r23 + cmpwi cr7,r23,0 + .L.das_label.8: + fxcsmadd f27,f11,f6,f27 + fxcsmadd f28,f12,f4,f28 + stfpdux f24,r23,r27 + fxcsmadd f29,f12,f5,f29 + fxcsmadd f30,f12,f6,f30 + stfpdux f25,r23,r28 + stfpdux f26,r23,r27 + stfpdux f27,r23,r27 + stfpdux f28,r23,r28 + stfpdux f29,r23,r27 + stfpdux f30,r23,r27 + addi r17,r17,1 + mulli r0,r4,40 + cmpw r17,r14 + subf r23,r0,r23 + blt+ .L.das_label.5 + .L.das_label.9: + addi r18,r18,1 + cmpw r18,r15 + blt+ .L.das_label.4 + .L.das_label.10: + mtctr r29 + lis r23,dummy@ha + addi r23,r23,dummy@l + addi r23,r23,-16 + lfpdux f13,r23,r27 + lfpdux f14,r23,r27 + lfpdux f15,r23,r27 + lfpdux f16,r23,r27 + lfpdux f17,r23,r27 + lfpdux f18,r23,r27 + lfpdux f19,r23,r27 + lfpdux f20,r23,r27 + lfpdux f21,r23,r27 + lfpdux f22,r23,r27 + lfpdux f23,r23,r27 + lfpdux f24,r23,r27 + lfpdux f25,r23,r27 + lfpdux f26,r23,r27 + lfpdux f27,r23,r27 + lfpdux f28,r23,r27 + lfpdux f29,r23,r27 + lfpdux f30,r23,r27 + lfpdux f31,r23,r27 + bne- cr7,.L.das_label.14 + .L.das_label.11: + cmpwi r13,0 + bne- .L.das_label.13 + .L.das_label.12: + lwz r0,100(r1) + li r3,3 + lwz r14,24(r1) + lwz r15,28(r1) + mtlr r0 + lwz r16,32(r1) + lwz r17,36(r1) + lwz r18,40(r1) + lwz r19,44(r1) + lwz r20,48(r1) + lwz r21,52(r1) + lwz r22,56(r1) + lwz r23,60(r1) + lwz r25,68(r1) + lwz r26,72(r1) + lwz r27,76(r1) + lwz r28,80(r1) + lwz r29,84(r1) + lwz r30,88(r1) + lwz r31,92(r1) + addi r1,r1,96 + blr + .L.das_label.13: + lis r3,.rodata@ha + mr r4,r13 + addi r3,r3,.rodata@l + crclr 4*cr1+eq + bl printf + b .L.das_label.12 + .L.das_label.14: + lis r3,.rodata+0x00000018@ha + mr r4,r12 + addi r3,r3,.rodata+0x00000018@l + crclr 4*cr1+eq + bl printf + b .L.das_label.11 + .L.das_label.15: + lis r3,.rodata+0x00000030@ha + mr r4,r7 + addi r3,r3,.rodata+0x00000030@l + crclr 4*cr1+eq + bl printf + b .L.das_label.3 + +.comm dummy,512,16 + + +.section ".rodata","a" +.align 2 + .ascii "ALIGNMENT PROBS1: %x\n" +.align 3 + .ascii "ALIGNMENT PROBS: %x\n" +.align 3 + .ascii "C PROB: %x\n" + +.ident "GCC: (GNU) 3.2" diff --git a/src/bg_mxm44.s b/src/bg_mxm44.s new file mode 100644 index 0000000..60ab4e3 --- /dev/null +++ b/src/bg_mxm44.s @@ -0,0 +1,497 @@ +.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4 +.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9 +.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14 +.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19 +.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24 +.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29 +.set r30,30; .set r31,31 +.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4 +.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9 +.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14 +.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19 +.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24 +.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29 +.set f30,30; .set f31,31 + +.file "bg_mxm44.s" + +.globl bg_mxm44 +.type bg_mxm44, @function +.size bg_mxm44, 1756 + + + +.section ".text" +.align 2 + + +bg_mxm44: + stwu r1,-576(r1) + stw r14,360(r1) + mr r12,r1 + stfd f14,432(r1) + li r14,16 + stfd f15,440(r1) + stfd f16,448(r1) + stfd f17,456(r1) + stfd f18,464(r1) + stfd f19,472(r1) + stfd f20,480(r1) + stfd f21,488(r1) + stfd f22,496(r1) + stfd f23,504(r1) + stfd f24,512(r1) + stfd f25,520(r1) + stfd f26,528(r1) + stfd f27,536(r1) + stfd f28,544(r1) + stfd f29,552(r1) + stfd f30,560(r1) + stfd f31,568(r1) + stw r15,364(r1) + stw r16,368(r1) + stw r17,372(r1) + stw r18,376(r1) + stw r19,380(r1) + stw r20,384(r1) + stw r21,388(r1) + stw r22,392(r1) + stw r23,396(r1) + stw r24,400(r1) + stw r25,404(r1) + stw r26,408(r1) + stw r28,416(r1) + stw r29,420(r1) + stw r30,424(r1) + stfpdux f14,r12,r14 + stfpdux f15,r12,r14 + stfpdux f16,r12,r14 + stfpdux f17,r12,r14 + stfpdux f18,r12,r14 + stfpdux f19,r12,r14 + stfpdux f20,r12,r14 + stfpdux f21,r12,r14 + stfpdux f22,r12,r14 + stfpdux f23,r12,r14 + stfpdux f24,r12,r14 + stfpdux f25,r12,r14 + stfpdux f26,r12,r14 + stfpdux f27,r12,r14 + stfpdux f28,r12,r14 + stfpdux f29,r12,r14 + stfpdux f30,r12,r14 + stfpdux f31,r12,r14 + lis r11,.rodata@ha + addi r10,r11,.rodata@l + lwz r4,0(r4) + lfd f23,0(r10) + lwz r6,0(r6) + fmr f0,f23 + lwz r8,0(r8) + fmr f1,f23 + fmr f2,f23 + fmr f3,f23 + fmr f8,f23 + fmr f9,f23 + fmr f10,f23 + fmr f11,f23 + fmr f16,f23 + fmr f17,f23 + fmr f18,f23 + fmr f19,f23 + fmr f20,f23 + fmr f21,f23 + fmr f22,f23 + li r13,16 + rlwinm r9,r4,5,0,26 + rlwinm r19,r6,5,0,26 + rlwinm r14,r4,3,0,28 + mr r25,r14 + rlwinm r12,r6,3,0,28 + mulli r11,r6,-3 + srawi r26,r4,2 + srawi r28,r8,2 + rlwinm r29,r26,2,0,29 + cmpw cr6,r29,r4 + mulli r18,r6,-4 + addi r18,r18,2 + addi r11,r11,2 + li r10,0 + rlwinm r11,r11,3,0,28 + rlwinm r18,r18,3,0,28 + mullw r23,r4,r6 + addi r23,r23,-4 + rlwinm r23,r23,3,0,28 + neg r23,r23 + subf r14,r13,r14 + add r23,r23,r14 + srawi r16,r6,2 + mr r22,r7 + cmpw r6,r6 + rlwinm r29,r16,2,0,29 + cmpw cr7,r29,r6 + mr r24,r5 + addi r16,r16,-1 + mr r15,r7 + li r29,0 + +.grabNgo_jloop: + subf r20,r23,r3 + subf r21,r18,r24 + addi r30,r15,-32 + li r17,0 + +.grabNgo_iloop: + fxcpmadd f16,f8,f0,f16 + lfpdux f4,r20,r23 + fxcpmadd f17,f8,f1,f17 + lfpdux f5,r20,r13 + fxcpmadd f18,f9,f0,f18 + lfpdux f12,r21,r18 + fxcpmadd f19,f9,f1,f19 + lfpdux f13,r21,r12 + fxcpmadd f20,f10,f0,f20 + lfpdux f14,r21,r12 + fxcpmadd f21,f10,f1,f21 + lfpdux f15,r21,r12 + fxcpmadd f22,f11,f0,f22 + lfpdux f6,r20,r14 + fxcpmadd f23,f11,f1,f23 + lfpdux f7,r20,r13 + fxcsmadd f24,f8,f2,f16 + addi r17,r17,1 + fxcsmadd f25,f8,f3,f17 + mtctr r16 + fxcsmadd f26,f9,f2,f18 + addi r30,r30,32 + fxcsmadd f27,f9,f3,f19 + cmpw cr1,r17,r26 + fxcsmadd f28,f10,f2,f20 + lfpdux f0,r20,r14 + fxcsmadd f29,f10,f3,f21 + lfpdux f1,r20,r13 + fxcsmadd f30,f11,f2,f22 + fxcsmadd f31,f11,f3,f23 + fxpmul f16,f12,f4 + stfpdux f24,r22,r10 + fxpmul f17,f12,f5 + stfpdux f25,r22,r13 + fxpmul f18,f13,f4 + stfpdux f26,r22,r14 + fxpmul f19,f13,f5 + stfpdux f27,r22,r13 + fxpmul f20,f14,f4 + stfpdux f28,r22,r14 + fxpmul f21,f14,f5 + stfpdux f29,r22,r13 + fxpmul f22,f15,f4 + stfpdux f30,r22,r14 + fxpmul f23,f15,f5 + stfpdux f31,r22,r13 + fxcsmadd f16,f12,f6,f16 + lfpdux f2,r20,r14 + fxcsmadd f17,f12,f7,f17 + lfpdux f3,r20,r13 + fxcsmadd f18,f13,f6,f18 + lfpdux f8,r21,r11 + fxcsmadd f19,f13,f7,f19 + lfpdux f9,r21,r12 + fxcsmadd f20,f14,f6,f20 + lfpdux f10,r21,r12 + fxcsmadd f21,f14,f7,f21 + lfpdux f11,r21,r12 + fxcsmadd f22,f15,f6,f22 + fxcsmadd f23,f15,f7,f23 + beq- cr7,.grabNgo_k_even4 + fxcpmadd f16,f8,f0,f16 + lfpdux f24,r20,r14 + fxcpmadd f17,f8,f1,f17 + lfpdux f25,r20,r13 + fxcpmadd f18,f9,f0,f18 + lfpdux f26,r21,r11 + fxcpmadd f19,f9,f1,f19 + lfpdux f27,r21,r12 + fxcpmadd f20,f10,f0,f20 + lfpdux f28,r21,r12 + fxcpmadd f21,f10,f1,f21 + lfpdux f29,r21,r12 + fxcpmadd f22,f11,f0,f22 + lfpdux f30,r20,r14 + fxcpmadd f23,f11,f1,f23 + lfpdux f31,r20,r13 + fxcsmadd f16,f8,f2,f16 + fxcsmadd f17,f8,f3,f17 + fxcsmadd f18,f9,f2,f18 + fxcsmadd f19,f9,f3,f19 + fxcsmadd f20,f10,f2,f20 + fxcsmadd f21,f10,f3,f21 + fxcsmadd f22,f11,f2,f22 + fxcsmadd f23,f11,f3,f23 + fxcpmadd f16,f26,f24,f16 + lfpdux f4,r20,r14 + fxcpmadd f17,f26,f25,f17 + lfpdux f5,r20,r13 + fxcpmadd f18,f27,f24,f18 + lfpdux f6,r20,r14 + fxcpmadd f19,f27,f25,f19 + lfpdux f7,r20,r13 + fxcpmadd f20,f28,f24,f20 + lfpdux f12,r21,r11 + fxcpmadd f21,f28,f25,f21 + lfpdux f13,r21,r12 + fxcpmadd f22,f29,f24,f22 + lfpdux f14,r21,r12 + fxcpmadd f23,f29,f25,f23 + lfpdux f15,r21,r12 + fxcsmadd f16,f26,f30,f16 + mr r22,r30 + fxcsmadd f17,f26,f31,f17 + fxcsmadd f18,f27,f30,f18 + fxcsmadd f19,f27,f31,f19 + fxcsmadd f20,f28,f30,f20 + fxcsmadd f21,f28,f31,f21 + fxcsmadd f22,f29,f30,f22 + fxcsmadd f23,f29,f31,f23 + b .grabNgo_kloop_k4 + +.grabNgo_k_even4: +.grabNgo_kloop: + fxcpmadd f16,f8,f0,f16 + lfpdux f4,r20,r14 + fxcpmadd f17,f8,f1,f17 + lfpdux f5,r20,r13 + fxcpmadd f18,f9,f0,f18 + lfpdux f6,r20,r14 + fxcpmadd f19,f9,f1,f19 + lfpdux f7,r20,r13 + fxcpmadd f20,f10,f0,f20 + lfpdux f12,r21,r11 + fxcpmadd f21,f10,f1,f21 + lfpdux f13,r21,r12 + fxcpmadd f22,f11,f0,f22 + lfpdux f14,r21,r12 + fxcpmadd f23,f11,f1,f23 + lfpdux f15,r21,r12 + fxcsmadd f16,f8,f2,f16 + mr r22,r30 + fxcsmadd f17,f8,f3,f17 + fxcsmadd f18,f9,f2,f18 + fxcsmadd f19,f9,f3,f19 + fxcsmadd f20,f10,f2,f20 + fxcsmadd f21,f10,f3,f21 + fxcsmadd f22,f11,f2,f22 + fxcsmadd f23,f11,f3,f23 + +.grabNgo_kloop_k4: + fxcpmadd f16,f12,f4,f16 + lfpdux f0,r20,r14 + fxcpmadd f17,f12,f5,f17 + lfpdux f1,r20,r13 + fxcpmadd f18,f13,f4,f18 + lfpdux f2,r20,r14 + fxcpmadd f19,f13,f5,f19 + lfpdux f3,r20,r13 + fxcpmadd f20,f14,f4,f20 + lfpdux f8,r21,r11 + fxcpmadd f21,f14,f5,f21 + lfpdux f9,r21,r12 + fxcpmadd f22,f15,f4,f22 + lfpdux f10,r21,r12 + fxcpmadd f23,f15,f5,f23 + lfpdux f11,r21,r12 + fxcsmadd f16,f12,f6,f16 + fxcsmadd f17,f12,f7,f17 + fxcsmadd f18,f13,f6,f18 + fxcsmadd f19,f13,f7,f19 + fxcsmadd f20,f14,f6,f20 + fxcsmadd f21,f14,f7,f21 + fxcsmadd f22,f15,f6,f22 + fxcsmadd f23,f15,f7,f23 + bdnz+ .grabNgo_k_even4 + blt+ cr1,.grabNgo_iloop + add r24,r24,r19 + add r15,r15,r9 + beq- cr6,.grabNgo_n1even4 + lfpdux f4,r20,r23 + lfpdux f5,r20,r25 + lfpdux f12,r21,r18 + lfpdux f13,r21,r12 + lfpdux f14,r21,r12 + lfpdux f15,r21,r12 + mtctr r16 + fxpmul f24,f12,f4 + fxpmul f25,f13,f4 + fxpmul f26,f14,f4 + fxpmul f27,f15,f4 + fxcsmadd f24,f12,f5,f24 + fxcsmadd f25,f13,f5,f25 + fxcsmadd f26,f14,f5,f26 + fxcsmadd f27,f15,f5,f27 + lfpdux f6,r20,r25 + lfpdux f7,r20,r25 + lfpdux f28,r21,r11 + lfpdux f29,r21,r12 + lfpdux f30,r21,r12 + lfpdux f31,r21,r12 + beq- cr7,.grabNgo_k_even4_2 + fxcpmadd f24,f28,f6,f24 + fxcpmadd f25,f29,f6,f25 + fxcpmadd f26,f30,f6,f26 + fxcpmadd f27,f31,f6,f27 + fxcsmadd f24,f28,f7,f24 + fxcsmadd f25,f29,f7,f25 + fxcsmadd f26,f30,f7,f26 + fxcsmadd f27,f31,f7,f27 + lfpdux f6,r20,r25 + lfpdux f7,r20,r25 + lfpdux f28,r21,r11 + lfpdux f29,r21,r12 + lfpdux f30,r21,r12 + lfpdux f31,r21,r12 + +.grabNgo_k_even4_2: + fxcpmadd f24,f28,f6,f24 + lfpdux f4,r20,r25 + fxcpmadd f25,f29,f6,f25 + lfpdux f5,r20,r25 + fxcpmadd f26,f30,f6,f26 + lfpdux f12,r21,r11 + fxcpmadd f27,f31,f6,f27 + lfpdux f13,r21,r12 + fxcsmadd f24,f28,f7,f24 + lfpdux f14,r21,r12 + fxcsmadd f25,f29,f7,f25 + lfpdux f15,r21,r12 + fxcsmadd f26,f30,f7,f26 + fxcsmadd f27,f31,f7,f27 + fxcpmadd f24,f12,f4,f24 + lfpdux f6,r20,r25 + fxcpmadd f25,f13,f4,f25 + lfpdux f7,r20,r25 + fxcpmadd f26,f14,f4,f26 + lfpdux f28,r21,r11 + fxcpmadd f27,f15,f4,f27 + lfpdux f29,r21,r12 + fxcsmadd f24,f12,f5,f24 + lfpdux f30,r21,r12 + fxcsmadd f25,f13,f5,f25 + lfpdux f31,r21,r12 + fxcsmadd f26,f14,f5,f26 + fxcsmadd f27,f15,f5,f27 + bdnz+ .grabNgo_k_even4_2 + fxcpmadd f24,f28,f6,f24 + fxcpmadd f25,f29,f6,f25 + fxcpmadd f26,f30,f6,f26 + fxcpmadd f27,f31,f6,f27 + fxcsmadd f24,f28,f7,f24 + fxcsmadd f25,f29,f7,f25 + fxcsmadd f26,f30,f7,f26 + fxcsmadd f27,f31,f7,f27 + addi r30,r30,32 + stfpdux f24,r30,r10 + stfpdux f25,r30,r25 + stfpdux f26,r30,r25 + stfpdux f27,r30,r25 + +.grabNgo_n1even4: + addi r29,r29,1 + cmpw cr5,r29,r28 + blt+ cr5,.grabNgo_jloop + fxcpmadd f16,f8,f0,f16 + fxcpmadd f17,f8,f1,f17 + fxcpmadd f18,f9,f0,f18 + fxcpmadd f19,f9,f1,f19 + fxcpmadd f20,f10,f0,f20 + fxcpmadd f21,f10,f1,f21 + fxcpmadd f22,f11,f0,f22 + fxcpmadd f23,f11,f1,f23 + fxcsmadd f16,f8,f2,f16 + fxcsmadd f17,f8,f3,f17 + fxcsmadd f18,f9,f2,f18 + fxcsmadd f19,f9,f3,f19 + fxcsmadd f20,f10,f2,f20 + fxcsmadd f21,f10,f3,f21 + fxcsmadd f22,f11,f2,f22 + fxcsmadd f23,f11,f3,f23 + stfpdux f16,r22,r10 + stfpdux f17,r22,r13 + stfpdux f18,r22,r14 + stfpdux f19,r22,r13 + stfpdux f20,r22,r14 + stfpdux f21,r22,r13 + stfpdux f22,r22,r14 + stfpdux f23,r22,r13 + mr r3,r1 + li r0,16 + lfpdux f14,r3,r0 + lfpdux f15,r3,r0 + lfpdux f16,r3,r0 + lfpdux f17,r3,r0 + lfpdux f18,r3,r0 + lfpdux f19,r3,r0 + lfpdux f20,r3,r0 + lfpdux f21,r3,r0 + lfpdux f22,r3,r0 + lfpdux f23,r3,r0 + lfpdux f24,r3,r0 + lfpdux f25,r3,r0 + lfpdux f26,r3,r0 + lfpdux f27,r3,r0 + lfpdux f28,r3,r0 + lfpdux f29,r3,r0 + lfpdux f30,r3,r0 + lfpdux f31,r3,r0 + lwz r14,360(r1) + li r3,0 + lwz r15,364(r1) + lwz r16,368(r1) + lwz r17,372(r1) + lwz r18,376(r1) + lwz r19,380(r1) + lwz r20,384(r1) + lwz r21,388(r1) + lwz r22,392(r1) + lwz r23,396(r1) + lwz r24,400(r1) + lwz r25,404(r1) + lwz r26,408(r1) + lwz r28,416(r1) + lwz r29,420(r1) + lwz r30,424(r1) + lfd f14,432(r1) + lfd f15,440(r1) + lfd f16,448(r1) + lfd f17,456(r1) + lfd f18,464(r1) + lfd f19,472(r1) + lfd f20,480(r1) + lfd f21,488(r1) + lfd f22,496(r1) + lfd f23,504(r1) + lfd f24,512(r1) + lfd f25,520(r1) + lfd f26,528(r1) + lfd f27,536(r1) + lfd f28,544(r1) + lfd f29,552(r1) + lfd f30,560(r1) + lfd f31,568(r1) + addi r1,r1,576 + blr + +.section ".rodata","a" +.align 3 + .long 0x00000000 + .long 0x00000000 + + +.section ".data","wa" +.align 3 +.type seconds_per_cycle, @object +.size seconds_per_cycle, 8 +seconds_per_cycle: + .long 0x3e188aec + .long 0x70377bb0 + + +.ident "GCC: (GNU) 3.2" diff --git a/src/bg_mxm44_uneven.s b/src/bg_mxm44_uneven.s new file mode 100644 index 0000000..15bf7b9 --- /dev/null +++ b/src/bg_mxm44_uneven.s @@ -0,0 +1,82 @@ +.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4 +.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9 +.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14 +.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19 +.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24 +.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29 +.set r30,30; .set r31,31 +.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4 +.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9 +.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14 +.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19 +.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24 +.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29 +.set f30,30; .set f31,31 + +.file "bg_mxm44_uneven.s" + +.globl bg_mxm44_uneven +.type bg_mxm44_uneven, @function +.size bg_mxm44_uneven, 220 + +.section ".text" +.align 2 + +bg_mxm44_uneven: + stwu r1,-64(r1) + mflr r0 + stw r0,68(r1) + stw r28,48(r1) + lwz r9,0(r8) + addi r8,r1,8 + lwz r28,0(r4) + srawi r0,r9,2 + addze r0,r0 + stw r23,28(r1) + rlwinm r0,r0,2,0,29 + stw r24,32(r1) + subf r0,r0,r9 + stw r25,36(r1) + subf r9,r0,r9 + stw r0,12(r1) + stw r9,8(r1) + mr r23,r4 + stw r26,40(r1) + mr r24,r6 + stw r27,44(r1) + mr r26,r5 + stw r29,52(r1) + mr r27,r7 + mr r25,r3 + lwz r29,0(r6) + crclr 4*cr1+eq + bl bg_mxm44 + addi r8,r1,12 + lwz r0,8(r1) + mr r3,r25 + mr r4,r23 + mr r6,r24 + mullw r28,r28,r0 + mullw r29,r29,r0 + rlwinm r28,r28,3,0,28 + add r27,r27,r28 + mr r7,r27 + rlwinm r29,r29,3,0,28 + add r26,r26,r29 + mr r5,r26 + crclr 4*cr1+eq + bl mxm44_0 + lwz r29,52(r1) + lwz r23,28(r1) + mr r3,r0 + lwz r24,32(r1) + lwz r0,68(r1) + lwz r25,36(r1) + lwz r26,40(r1) + mtlr r0 + lwz r27,44(r1) + lwz r28,48(r1) + addi r1,r1,64 + blr + +.ident "GCC: (GNU) 3.2" diff --git a/src/blas.f b/src/blas.f new file mode 100644 index 0000000..e0129cc --- /dev/null +++ b/src/blas.f @@ -0,0 +1,30886 @@ + subroutine caxpy(n,ca,cx,incx,cy,incy) +c +c constant times a vector plus a vector. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ca + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if (abs(real(ca)) + abs(aimag(ca)) .eq. 0.0 ) return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + cy(iy) = cy(iy) + ca*cx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + cy(i) = cy(i) + ca*cx(i) + 30 continue + return + end + subroutine ccopy(n,cx,incx,cy,incy) +c +c copies a vector, x, to a vector, y. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*) + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + cy(iy) = cx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + cy(i) = cx(i) + 30 continue + return + end + complex function cdotc(n,cx,incx,cy,incy) +c +c forms the dot product of two vectors, conjugating the first +c vector. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ctemp + integer i,incx,incy,ix,iy,n +c + ctemp = (0.0,0.0) + cdotc = (0.0,0.0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ctemp = ctemp + conjg(cx(ix))*cy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + cdotc = ctemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ctemp = ctemp + conjg(cx(i))*cy(i) + 30 continue + cdotc = ctemp + return + end + complex function cdotu(n,cx,incx,cy,incy) +c +c forms the dot product of two vectors. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ctemp + integer i,incx,incy,ix,iy,n +c + ctemp = (0.0,0.0) + cdotu = (0.0,0.0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ctemp = ctemp + cx(ix)*cy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + cdotu = ctemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ctemp = ctemp + cx(i)*cy(i) + 30 continue + cdotu = ctemp + return + end + SUBROUTINE CGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + K = KUP1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + CONJG( A( K + I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + CONJG( A( K + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGBMV . +* + END + SUBROUTINE CGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = conjg( B' ). +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. Local Scalars .. + LOGICAL CONJA, CONJB, NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA, NCOLA and NROWB as the number of rows and columns of A +* and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + CONJA = LSAME( TRANSA, 'C' ) + CONJB = LSAME( TRANSB, 'C' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.CONJA ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.CONJB ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE IF( CONJA )THEN +* +* Form C := alpha*conjg( A' )*B + beta*C. +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 150, J = 1, N + DO 140, I = 1, M + TEMP = ZERO + DO 130, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 130 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF( NOTA )THEN + IF( CONJB )THEN +* +* Form C := alpha*A*conjg( B' ) + beta*C. +* + DO 200, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 160, I = 1, M + C( I, J ) = ZERO + 160 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 170, I = 1, M + C( I, J ) = BETA*C( I, J ) + 170 CONTINUE + END IF + DO 190, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( B( J, L ) ) + DO 180, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B' + beta*C +* + DO 250, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 210, I = 1, M + C( I, J ) = ZERO + 210 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 220, I = 1, M + C( I, J ) = BETA*C( I, J ) + 220 CONTINUE + END IF + DO 240, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 230, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 230 CONTINUE + END IF + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF( CONJA )THEN + IF( CONJB )THEN +* +* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. +* + DO 280, J = 1, N + DO 270, I = 1, M + TEMP = ZERO + DO 260, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*CONJG( B( J, L ) ) + 260 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*conjg( A' )*B' + beta*C +* + DO 310, J = 1, N + DO 300, I = 1, M + TEMP = ZERO + DO 290, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*B( J, L ) + 290 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF( CONJB )THEN +* +* Form C := alpha*A'*conjg( B' ) + beta*C +* + DO 340, J = 1, N + DO 330, I = 1, M + TEMP = ZERO + DO 320, L = 1, K + TEMP = TEMP + A( L, I )*CONJG( B( J, L ) ) + 320 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 370, J = 1, N + DO 360, I = 1, M + TEMP = ZERO + DO 350, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 350 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEMM . +* + END + SUBROUTINE CGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + IF( NOCONJ )THEN + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = 1, M + TEMP = TEMP + CONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + IF( NOCONJ )THEN + DO 120, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = 1, M + TEMP = TEMP + CONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEMV . +* + END + SUBROUTINE CGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGERC performs the rank 1 operation +* +* A := alpha*x*conjg( y' ) + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGERC ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( Y( JY ) ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( Y( JY ) ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of CGERC . +* + END + SUBROUTINE CGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGERU performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGERU ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of CGERU . +* + END + SUBROUTINE CHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*REAL( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*REAL( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*REAL( A( 1, J ) ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*REAL( A( 1, J ) ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHBMV . +* + END + SUBROUTINE CHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CHEMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is an hermitian matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the hermitian matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the hermitian matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* hermitian matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* hermitian matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*CONJG( A( K, I ) ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*CONJG( A( K, I ) ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*REAL( A( J, J ) ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*CONJG( A( J, K ) ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*CONJG( A( J, K ) ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of CHEMM . +* + END + SUBROUTINE CHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHEMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHEMV . +* + END + SUBROUTINE CHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHER2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHER2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER2 . +* + END + SUBROUTINE CHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + REAL BETA + COMPLEX ALPHA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CHER2K performs one of the hermitian rank 2k operations +* +* C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, +* +* where alpha and beta are scalars with beta real, C is an n by n +* hermitian matrix and A and B are n by k matrices in the first case +* and k by n matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + +* conjg( alpha )*B*conjg( A' ) + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + +* conjg( alpha )*conjg( B' )*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHER2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + C( J, J ) = BETA*REAL( C( J, J ) ) + DO 70, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + +* C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( B( J, L ) ) + TEMP2 = CONJG( ALPHA*A( J, L ) ) + DO 110, I = 1, J - 1 + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( A( J, L )*TEMP1 + + $ B( J, L )*TEMP2 ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( B( J, L ) ) + TEMP2 = CONJG( ALPHA*A( J, L ) ) + DO 160, I = J + 1, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( A( J, L )*TEMP1 + + $ B( J, L )*TEMP2 ) + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + +* C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J ) + 190 CONTINUE + IF( I.EQ.J )THEN + IF( BETA.EQ.REAL( ZERO ) )THEN + C( J, J ) = REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + ELSE + C( J, J ) = BETA*REAL( C( J, J ) ) + + $ REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + END IF + ELSE + IF( BETA.EQ.REAL( ZERO ) )THEN + C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + END IF + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J ) + 220 CONTINUE + IF( I.EQ.J )THEN + IF( BETA.EQ.REAL( ZERO ) )THEN + C( J, J ) = REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + ELSE + C( J, J ) = BETA*REAL( C( J, J ) ) + + $ REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + END IF + ELSE + IF( BETA.EQ.REAL( ZERO ) )THEN + C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + END IF + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER2K. +* + END + SUBROUTINE CHER ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CHER performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + REAL( X( J )*TEMP ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + IX = KX + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + REAL( X( JX )*TEMP ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( J ) ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( JX ) ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + A( I, J ) = A( I, J ) + X( IX )*TEMP + 70 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER . +* + END + SUBROUTINE CHERK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CHERK performs one of the hermitian rank k operations +* +* C := alpha*A*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*A + beta*C, +* +* where alpha and beta are real scalars, C is an n by n hermitian +* matrix and A is an n by k matrix in the first case and a k by n +* matrix in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CMPLX, CONJG, MAX, REAL +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + REAL RTEMP + COMPLEX TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHERK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + C( J, J ) = BETA*REAL( C( J, J ) ) + DO 70, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*conjg( A' ) + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.CMPLX( ZERO ) )THEN + TEMP = ALPHA*CONJG( A( J, L ) ) + DO 110, I = 1, J - 1 + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( TEMP*A( I, L ) ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + C( J, J ) = BETA*REAL( C( J, J ) ) + DO 150, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.CMPLX( ZERO ) )THEN + TEMP = ALPHA*CONJG( A( J, L ) ) + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( TEMP*A( J, L ) ) + DO 160, I = J + 1, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*A + beta*C. +* + IF( UPPER )THEN + DO 220, J = 1, N + DO 200, I = 1, J - 1 + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + RTEMP = ZERO + DO 210, L = 1, K + RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J ) + 210 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) ) + END IF + 220 CONTINUE + ELSE + DO 260, J = 1, N + RTEMP = ZERO + DO 230, L = 1, K + RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J ) + 230 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) ) + END IF + DO 250, I = J + 1, N + TEMP = ZERO + DO 240, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*A( L, J ) + 240 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 250 CONTINUE + 260 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHERK . +* + END + SUBROUTINE CHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*REAL( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*REAL( AP( KK ) ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK ) ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPMV . +* + END + SUBROUTINE CHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHPR2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + + $ REAL( X( JX )*TEMP1 + + $ Y( JY )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + AP( KK ) = REAL( AP( KK ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + AP( KK ) = REAL( AP( KK ) ) + + $ REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPR2 . +* + END + SUBROUTINE CHPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CHPR performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + $ + REAL( X( J )*TEMP ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + IX = KX + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + $ + REAL( X( JX )*TEMP ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( J ) ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( JX ) ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPR . +* + END + subroutine crotg(ca,cb,c,s) + complex ca,cb,s + real c + real norm,scale + complex alpha + if (cabs(ca) .ne. 0.) go to 10 + c = 0. + s = (1.,0.) + ca = cb + go to 20 + 10 continue + scale = cabs(ca) + cabs(cb) + norm = scale * sqrt((cabs(ca/scale))**2 + (cabs(cb/scale))**2) + alpha = ca /cabs(ca) + c = cabs(ca) / norm + s = alpha * conjg(cb) / norm + ca = alpha * norm + 20 continue + return + end + subroutine cscal(n,ca,cx,incx) +c +c scales a vector by a constant. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex ca,cx(*) + integer i,incx,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + cx(i) = ca*cx(i) + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + cx(i) = ca*cx(i) + 30 continue + return + end + subroutine csrot (n,cx,incx,cy,incy,c,s) +c +c applies a plane rotation, where the cos and sin (c and s) are real +c and the vectors cx and cy are complex. +c jack dongarra, linpack, 3/11/78. +c + complex cx(1),cy(1),ctemp + real c,s + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ctemp = c*cx(ix) + s*cy(iy) + cy(iy) = c*cy(iy) - s*cx(ix) + cx(ix) = ctemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ctemp = c*cx(i) + s*cy(i) + cy(i) = c*cy(i) - s*cx(i) + cx(i) = ctemp + 30 continue + return + end + subroutine csscal(n,sa,cx,incx) +c +c scales a complex vector by a real constant. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*) + real sa + integer i,incx,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i))) + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i))) + 30 continue + return + end + subroutine cswap (n,cx,incx,cy,incy) +c +c interchanges two vectors. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ctemp + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ctemp = cx(ix) + cx(ix) = cy(iy) + cy(iy) = ctemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 + 20 do 30 i = 1,n + ctemp = cx(i) + cx(i) = cy(i) + cy(i) = ctemp + 30 continue + return + end + SUBROUTINE CSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of CSYMM . +* + END + SUBROUTINE CSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYR2K. +* + END + SUBROUTINE CSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYRK . +* + END + SUBROUTINE CTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( KPLUS1, J ) ) + DO 100, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + CONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 120, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( KPLUS1, J ) ) + DO 130, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( 1, J ) ) + DO 160, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + CONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 180, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( 1, J ) ) + DO 190, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTBMV . +* + END + SUBROUTINE CTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A') )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 100, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( KPLUS1, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 130, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( KPLUS1, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 160, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( 1, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + IF( NOCONJ )THEN + DO 180, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 190, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( 1, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTBSV . +* + END + SUBROUTINE CTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + K = KK - 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + CONJG( AP( K ) )*X( I ) + K = K - 1 + 100 CONTINUE + END IF + X( J ) = TEMP + KK = KK - J + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 120, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 130, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + CONJG( AP( K ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 140 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + K = KK + 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 160, I = J + 1, N + TEMP = TEMP + CONJG( AP( K ) )*X( I ) + K = K + 1 + 160 CONTINUE + END IF + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 180, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 190, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + CONJG( AP( K ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTPMV . +* + END + SUBROUTINE CTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - CONJG( AP( K ) )*X( I ) + K = K + 1 + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK + J - 1 ) ) + END IF + X( J ) = TEMP + KK = KK + J + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 120, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 130, K = KK, KK + J - 2 + TEMP = TEMP - CONJG( AP( K ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK + J - 1 ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 140 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - CONJG( AP( K ) )*X( I ) + K = K - 1 + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK - N + J ) ) + END IF + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - CONJG( AP( K ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK - N + J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTPSV . +* + END + SUBROUTINE CTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX ALPHA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ) +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. +* + IF( UPPER )THEN + DO 120, J = 1, N + DO 110, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( I, I ) ) + DO 100, K = 1, I - 1 + TEMP = TEMP + CONJG( A( K, I ) )*B( K, J ) + 100 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 110 CONTINUE + 120 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = 1, M + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 130, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 130 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( I, I ) ) + DO 140, K = I + 1, M + TEMP = TEMP + CONJG( A( K, I ) )*B( K, J ) + 140 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 200, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 170, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 170 CONTINUE + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 180, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE + DO 240, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 210, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 210 CONTINUE + DO 230, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 220, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 220 CONTINUE + END IF + 230 CONTINUE + 240 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). +* + IF( UPPER )THEN + DO 280, K = 1, N + DO 260, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*CONJG( A( J, K ) ) + END IF + DO 250, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*CONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + ELSE + DO 320, K = N, 1, -1 + DO 300, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*CONJG( A( J, K ) ) + END IF + DO 290, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*CONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 310, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 310 CONTINUE + END IF + 320 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRMM . +* + END + SUBROUTINE CTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + CONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 120, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 130, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + CONJG( A( I, J ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 160, I = J + 1, N + TEMP = TEMP + CONJG( A( I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 180, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 190, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + CONJG( A( I, J ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRMV . +* + END + SUBROUTINE CTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX ALPHA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B +* or B := alpha*inv( conjg( A' ) )*B. +* + IF( UPPER )THEN + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 120, K = 1, I - 1 + TEMP = TEMP - CONJG( A( K, I ) )*B( K, J ) + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 130 CONTINUE + 140 CONTINUE + ELSE + DO 180, J = 1, N + DO 170, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 150, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 160, K = I + 1, M + TEMP = TEMP - CONJG( A( K, I ) )*B( K, J ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 170 CONTINUE + 180 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 230, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 190, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 190 CONTINUE + END IF + DO 210, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 200, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 220, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 220 CONTINUE + END IF + 230 CONTINUE + ELSE + DO 280, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 240, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 240 CONTINUE + END IF + DO 260, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 250, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 270, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 270 CONTINUE + END IF + 280 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ) +* or B := alpha*B*inv( conjg( A' ) ). +* + IF( UPPER )THEN + DO 330, K = N, 1, -1 + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/CONJG( A( K, K ) ) + END IF + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + DO 310, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = CONJG( A( J, K ) ) + END IF + DO 300, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 320, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 320 CONTINUE + END IF + 330 CONTINUE + ELSE + DO 380, K = 1, N + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/CONJG( A( K, K ) ) + END IF + DO 340, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 340 CONTINUE + END IF + DO 360, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = CONJG( A( J, K ) ) + END IF + DO 350, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 370, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 370 CONTINUE + END IF + 380 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRSM . +* + END + SUBROUTINE CTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - CONJG( A( I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 120, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 130, I = 1, J - 1 + TEMP = TEMP - CONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - CONJG( A( I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 180, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 190, I = N, J + 1, -1 + TEMP = TEMP - CONJG( A( I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRSV . +* + END + double precision function dasum(n,dx,incx) +c +c takes the sum of the absolute values. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dtemp + integer i,incx,m,mp1,n,nincx +c + dasum = 0.0d0 + dtemp = 0.0d0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + dtemp = dtemp + dabs(dx(i)) + 10 continue + dasum = dtemp + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,6) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dtemp = dtemp + dabs(dx(i)) + 30 continue + if( n .lt. 6 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,6 + dtemp = dtemp + dabs(dx(i)) + dabs(dx(i + 1)) + dabs(dx(i + 2)) + * + dabs(dx(i + 3)) + dabs(dx(i + 4)) + dabs(dx(i + 5)) + 50 continue + 60 dasum = dtemp + return + end + subroutine daxpy(n,da,dx,incx,dy,incy) +c +c constant times a vector plus a vector. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),da + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if (da .eq. 0.0d0) return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dy(iy) = dy(iy) + da*dx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,4) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dy(i) = dy(i) + da*dx(i) + 30 continue + if( n .lt. 4 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,4 + dy(i) = dy(i) + da*dx(i) + dy(i + 1) = dy(i + 1) + da*dx(i + 1) + dy(i + 2) = dy(i + 2) + da*dx(i + 2) + dy(i + 3) = dy(i + 3) + da*dx(i + 3) + 50 continue + return + end + double precision function dcabs1(z) + double complex z,zz + double precision t(2) + equivalence (zz,t(1)) + zz = z + dcabs1 = dabs(t(1)) + dabs(t(2)) + return + end + subroutine dcopy(n,dx,incx,dy,incy) +c +c copies a vector, x, to a vector, y. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*) + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dy(iy) = dx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,7) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dy(i) = dx(i) + 30 continue + if( n .lt. 7 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,7 + dy(i) = dx(i) + dy(i + 1) = dx(i + 1) + dy(i + 2) = dx(i + 2) + dy(i + 3) = dx(i + 3) + dy(i + 4) = dx(i + 4) + dy(i + 5) = dx(i + 5) + dy(i + 6) = dx(i + 6) + 50 continue + return + end + double precision function ddot(n,dx,incx,dy,incy) +c +c forms the dot product of two vectors. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),dtemp + integer i,incx,incy,ix,iy,m,mp1,n +c + ddot = 0.0d0 + dtemp = 0.0d0 + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dtemp = dtemp + dx(ix)*dy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + ddot = dtemp + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dtemp = dtemp + dx(i)*dy(i) + 30 continue + if( n .lt. 5 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + + * dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4) + 50 continue + 60 ddot = dtemp + return + end + SUBROUTINE DGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + K = KUP1 - J + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGBMV . +* + END + SUBROUTINE DGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X', +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = A'. +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = B'. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* transposed and set NROWA, NCOLA and NROWB as the number of rows +* and columns of A and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.LSAME( TRANSB, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And if alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE + IF( NOTA )THEN +* +* Form C := alpha*A*B' + beta*C +* + DO 170, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 130, I = 1, M + C( I, J ) = ZERO + 130 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 140, I = 1, M + C( I, J ) = BETA*C( I, J ) + 140 CONTINUE + END IF + DO 160, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 150 CONTINUE + END IF + 160 CONTINUE + 170 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 200, J = 1, N + DO 190, I = 1, M + TEMP = ZERO + DO 180, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 180 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 190 CONTINUE + 200 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEMM . +* + END + SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + DO 110, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEMV . +* + END + SUBROUTINE DGER ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DGER performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of DGER . +* + END + DOUBLE PRECISION FUNCTION DNRM2 ( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* DNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* DNRM2 := sqrt( x'*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to DLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION ABSXI, NORM, SCALE, SSQ +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE IF( N.EQ.1 )THEN + NORM = ABS( X( 1 ) ) + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( X( IX ).NE.ZERO )THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI )THEN + SSQ = ONE + SSQ*( SCALE/ABSXI )**2 + SCALE = ABSXI + ELSE + SSQ = SSQ + ( ABSXI/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + DNRM2 = NORM + RETURN +* +* End of DNRM2. +* + END + subroutine drot (n,dx,incx,dy,incy,c,s) +c +c applies a plane rotation. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),dtemp,c,s + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dtemp = c*dx(ix) + s*dy(iy) + dy(iy) = c*dy(iy) - s*dx(ix) + dx(ix) = dtemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + dtemp = c*dx(i) + s*dy(i) + dy(i) = c*dy(i) - s*dx(i) + dx(i) = dtemp + 30 continue + return + end + subroutine drotg(da,db,c,s) +c +c construct givens plane rotation. +c jack dongarra, linpack, 3/11/78. +c + double precision da,db,c,s,roe,scale,r,z +c + roe = db + if( dabs(da) .gt. dabs(db) ) roe = da + scale = dabs(da) + dabs(db) + if( scale .ne. 0.0d0 ) go to 10 + c = 1.0d0 + s = 0.0d0 + r = 0.0d0 + z = 0.0d0 + go to 20 + 10 r = scale*dsqrt((da/scale)**2 + (db/scale)**2) + r = dsign(1.0d0,roe)*r + c = da/r + s = db/r + z = 1.0d0 + if( dabs(da) .gt. dabs(db) ) z = s + if( dabs(db) .ge. dabs(da) .and. c .ne. 0.0d0 ) z = 1.0d0/c + 20 da = r + db = z + return + end + SUBROUTINE DROTM (N,DX,INCX,DY,INCY,DPARAM) +C +C APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX +C +C (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN +C (DY**T) +C +C DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE +C LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. +C WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. +C +C DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 +C +C (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) +C H=( ) ( ) ( ) ( ) +C (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). +C SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. +C + DOUBLE PRECISION DFLAG,DH12,DH22,DX,TWO,Z,DH11,DH21, + 1 DPARAM,DY,W,ZERO + DIMENSION DX(1),DY(1),DPARAM(5) + DATA ZERO,TWO/0.D0,2.D0/ +C + DFLAG=DPARAM(1) + IF(N .LE. 0 .OR.(DFLAG+TWO.EQ.ZERO)) GO TO 140 + IF(.NOT.(INCX.EQ.INCY.AND. INCX .GT.0)) GO TO 70 +C + NSTEPS=N*INCX + IF(DFLAG) 50,10,30 + 10 CONTINUE + DH12=DPARAM(4) + DH21=DPARAM(3) + DO 20 I=1,NSTEPS,INCX + W=DX(I) + Z=DY(I) + DX(I)=W+Z*DH12 + DY(I)=W*DH21+Z + 20 CONTINUE + GO TO 140 + 30 CONTINUE + DH11=DPARAM(2) + DH22=DPARAM(5) + DO 40 I=1,NSTEPS,INCX + W=DX(I) + Z=DY(I) + DX(I)=W*DH11+Z + DY(I)=-W+DH22*Z + 40 CONTINUE + GO TO 140 + 50 CONTINUE + DH11=DPARAM(2) + DH12=DPARAM(4) + DH21=DPARAM(3) + DH22=DPARAM(5) + DO 60 I=1,NSTEPS,INCX + W=DX(I) + Z=DY(I) + DX(I)=W*DH11+Z*DH12 + DY(I)=W*DH21+Z*DH22 + 60 CONTINUE + GO TO 140 + 70 CONTINUE + KX=1 + KY=1 + IF(INCX .LT. 0) KX=1+(1-N)*INCX + IF(INCY .LT. 0) KY=1+(1-N)*INCY +C + IF(DFLAG)120,80,100 + 80 CONTINUE + DH12=DPARAM(4) + DH21=DPARAM(3) + DO 90 I=1,N + W=DX(KX) + Z=DY(KY) + DX(KX)=W+Z*DH12 + DY(KY)=W*DH21+Z + KX=KX+INCX + KY=KY+INCY + 90 CONTINUE + GO TO 140 + 100 CONTINUE + DH11=DPARAM(2) + DH22=DPARAM(5) + DO 110 I=1,N + W=DX(KX) + Z=DY(KY) + DX(KX)=W*DH11+Z + DY(KY)=-W+DH22*Z + KX=KX+INCX + KY=KY+INCY + 110 CONTINUE + GO TO 140 + 120 CONTINUE + DH11=DPARAM(2) + DH12=DPARAM(4) + DH21=DPARAM(3) + DH22=DPARAM(5) + DO 130 I=1,N + W=DX(KX) + Z=DY(KY) + DX(KX)=W*DH11+Z*DH12 + DY(KY)=W*DH21+Z*DH22 + KX=KX+INCX + KY=KY+INCY + 130 CONTINUE + 140 CONTINUE + RETURN + END + SUBROUTINE DROTMG (DD1,DD2,DX1,DY1,DPARAM) +C +C CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS +C THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* +C DY2)**T. +C WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. +C +C DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 +C +C (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) +C H=( ) ( ) ( ) ( ) +C (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). +C LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 +C RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE +C VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) +C +C THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE +C INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE +C OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. +C + DOUBLE PRECISION GAM,ONE,RGAMSQ,DD2,DH11,DH21,DPARAM,DP2, + 1 DQ2,DU,DY1,ZERO,GAMSQ,DD1,DFLAG,DH12,DH22,DP1,DQ1, + 2 DTEMP,DX1,TWO + DIMENSION DPARAM(5) +C + DATA ZERO,ONE,TWO /0.D0,1.D0,2.D0/ + DATA GAM,GAMSQ,RGAMSQ/4096.D0,16777216.D0,5.9604645D-8/ + IF(.NOT. DD1 .LT. ZERO) GO TO 10 +C GO ZERO-H-D-AND-DX1.. + GO TO 60 + 10 CONTINUE +C CASE-DD1-NONNEGATIVE + DP2=DD2*DY1 + IF(.NOT. DP2 .EQ. ZERO) GO TO 20 + DFLAG=-TWO + GO TO 260 +C REGULAR-CASE.. + 20 CONTINUE + DP1=DD1*DX1 + DQ2=DP2*DY1 + DQ1=DP1*DX1 +C + IF(.NOT. DABS(DQ1) .GT. DABS(DQ2)) GO TO 40 + DH21=-DY1/DX1 + DH12=DP2/DP1 +C + DU=ONE-DH12*DH21 +C + IF(.NOT. DU .LE. ZERO) GO TO 30 +C GO ZERO-H-D-AND-DX1.. + GO TO 60 + 30 CONTINUE + DFLAG=ZERO + DD1=DD1/DU + DD2=DD2/DU + DX1=DX1*DU +C GO SCALE-CHECK.. + GO TO 100 + 40 CONTINUE + IF(.NOT. DQ2 .LT. ZERO) GO TO 50 +C GO ZERO-H-D-AND-DX1.. + GO TO 60 + 50 CONTINUE + DFLAG=ONE + DH11=DP1/DP2 + DH22=DX1/DY1 + DU=ONE+DH11*DH22 + DTEMP=DD2/DU + DD2=DD1/DU + DD1=DTEMP + DX1=DY1*DU +C GO SCALE-CHECK + GO TO 100 +C PROCEDURE..ZERO-H-D-AND-DX1.. + 60 CONTINUE + DFLAG=-ONE + DH11=ZERO + DH12=ZERO + DH21=ZERO + DH22=ZERO +C + DD1=ZERO + DD2=ZERO + DX1=ZERO +C RETURN.. + GO TO 220 +C PROCEDURE..FIX-H.. + 70 CONTINUE + IF(.NOT. DFLAG .GE. ZERO) GO TO 90 +C + IF(.NOT. DFLAG .EQ. ZERO) GO TO 80 + DH11=ONE + DH22=ONE + DFLAG=-ONE + GO TO 90 + 80 CONTINUE + DH21=-ONE + DH12=ONE + DFLAG=-ONE + 90 CONTINUE + GO TO IGO,(120,150,180,210) +C PROCEDURE..SCALE-CHECK + 100 CONTINUE + 110 CONTINUE + IF(.NOT. DD1 .LE. RGAMSQ) GO TO 130 + IF(DD1 .EQ. ZERO) GO TO 160 + ASSIGN 120 TO IGO +C FIX-H.. + GO TO 70 + 120 CONTINUE + DD1=DD1*GAM**2 + DX1=DX1/GAM + DH11=DH11/GAM + DH12=DH12/GAM + GO TO 110 + 130 CONTINUE + 140 CONTINUE + IF(.NOT. DD1 .GE. GAMSQ) GO TO 160 + ASSIGN 150 TO IGO +C FIX-H.. + GO TO 70 + 150 CONTINUE + DD1=DD1/GAM**2 + DX1=DX1*GAM + DH11=DH11*GAM + DH12=DH12*GAM + GO TO 140 + 160 CONTINUE + 170 CONTINUE + IF(.NOT. DABS(DD2) .LE. RGAMSQ) GO TO 190 + IF(DD2 .EQ. ZERO) GO TO 220 + ASSIGN 180 TO IGO +C FIX-H.. + GO TO 70 + 180 CONTINUE + DD2=DD2*GAM**2 + DH21=DH21/GAM + DH22=DH22/GAM + GO TO 170 + 190 CONTINUE + 200 CONTINUE + IF(.NOT. DABS(DD2) .GE. GAMSQ) GO TO 220 + ASSIGN 210 TO IGO +C FIX-H.. + GO TO 70 + 210 CONTINUE + DD2=DD2/GAM**2 + DH21=DH21*GAM + DH22=DH22*GAM + GO TO 200 + 220 CONTINUE + IF(DFLAG)250,230,240 + 230 CONTINUE + DPARAM(3)=DH21 + DPARAM(4)=DH12 + GO TO 260 + 240 CONTINUE + DPARAM(2)=DH11 + DPARAM(5)=DH22 + GO TO 260 + 250 CONTINUE + DPARAM(2)=DH11 + DPARAM(3)=DH21 + DPARAM(4)=DH12 + DPARAM(5)=DH22 + 260 CONTINUE + DPARAM(1)=DFLAG + RETURN + END + SUBROUTINE DSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( 1, J ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( 1, J ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSBMV . +* + END + subroutine dscal(n,da,dx,incx) +c +c scales a vector by a constant. +c uses unrolled loops for increment equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision da,dx(*) + integer i,incx,m,mp1,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + dx(i) = da*dx(i) + 10 continue + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dx(i) = da*dx(i) + 30 continue + if( n .lt. 5 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + dx(i) = da*dx(i) + dx(i + 1) = da*dx(i + 1) + dx(i + 2) = da*dx(i + 2) + dx(i + 3) = da*dx(i + 3) + dx(i + 4) = da*dx(i + 4) + 50 continue + return + end +*DECK DSDOT + DOUBLE PRECISION FUNCTION DSDOT (N, SX, INCX, SY, INCY) +C***BEGIN PROLOGUE DSDOT +C***PURPOSE Compute the inner product of two vectors with extended +C precision accumulation and result. +C***LIBRARY SLATEC (BLAS) +C***CATEGORY D1A4 +C***TYPE DOUBLE PRECISION (DSDOT-D, DCDOT-C) +C***KEYWORDS BLAS, COMPLEX VECTORS, DOT PRODUCT, INNER PRODUCT, +C LINEAR ALGEBRA, VECTOR +C***AUTHOR Lawson, C. L., (JPL) +C Hanson, R. J., (SNLA) +C Kincaid, D. R., (U. of Texas) +C Krogh, F. T., (JPL) +C***DESCRIPTION +C +C B L A S Subprogram +C Description of Parameters +C +C --Input-- +C N number of elements in input vector(s) +C SX single precision vector with N elements +C INCX storage spacing between elements of SX +C SY single precision vector with N elements +C INCY storage spacing between elements of SY +C +C --Output-- +C DSDOT double precision dot product (zero if N.LE.0) +C +C Returns D.P. dot product accumulated in D.P., for S.P. SX and SY +C DSDOT = sum for I = 0 to N-1 of SX(LX+I*INCX) * SY(LY+I*INCY), +C where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is +C defined in a similar way using INCY. +C +C***REFERENCES C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. +C Krogh, Basic linear algebra subprograms for Fortran +C usage, Algorithm No. 539, Transactions on Mathematical +C Software 5, 3 (September 1979), pp. 308-323. +C***ROUTINES CALLED (NONE) +C***REVISION HISTORY (YYMMDD) +C 791001 DATE WRITTEN +C 890831 Modified array declarations. (WRB) +C 890831 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 920310 Corrected definition of LX in DESCRIPTION. (WRB) +C 920501 Reformatted the REFERENCES section. (WRB) +C***END PROLOGUE DSDOT + REAL SX(*),SY(*) +C***FIRST EXECUTABLE STATEMENT DSDOT + DSDOT = 0.0D0 + IF (N .LE. 0) RETURN + IF (INCX.EQ.INCY .AND. INCX.GT.0) GO TO 20 +C +C Code for unequal or nonpositive increments. +C + KX = 1 + KY = 1 + IF (INCX .LT. 0) KX = 1+(1-N)*INCX + IF (INCY .LT. 0) KY = 1+(1-N)*INCY + DO 10 I = 1,N + DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY)) + KX = KX + INCX + KY = KY + INCY + 10 CONTINUE + RETURN +C +C Code for equal, positive, non-unit increments. +C + 20 NS = N*INCX + DO 30 I = 1,NS,INCX + DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I)) + 30 CONTINUE + RETURN + END + SUBROUTINE DSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPMV . +* + END + SUBROUTINE DSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSPR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPR2 . +* + END + SUBROUTINE DSPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DSPR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPR . +* + END + subroutine dswap (n,dx,incx,dy,incy) +c +c interchanges two vectors. +c uses unrolled loops for increments equal one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),dtemp + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dtemp = dx(ix) + dx(ix) = dy(iy) + dy(iy) = dtemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,3) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + 30 continue + if( n .lt. 3 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,3 + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + dtemp = dx(i + 1) + dx(i + 1) = dy(i + 1) + dy(i + 1) = dtemp + dtemp = dx(i + 2) + dx(i + 2) = dy(i + 2) + dy(i + 2) = dtemp + 50 continue + return + end + SUBROUTINE DSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + DOUBLE PRECISION TEMP1, TEMP2 +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of DSYMM . +* + END + SUBROUTINE DSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSYMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYMV . +* + END + SUBROUTINE DSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSYR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR2 . +* + END + SUBROUTINE DSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + DOUBLE PRECISION TEMP1, TEMP2 +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR2K. +* + END + SUBROUTINE DSYR ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DSYR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR . +* + END + SUBROUTINE DSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYRK . +* + END + SUBROUTINE DTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 110, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 130, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTBMV . +* + END + SUBROUTINE DTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A')*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 110, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + DO 130, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTBSV . +* + END + SUBROUTINE DTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK =1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK - 1 + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + X( J ) = TEMP + KK = KK - J + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 110, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 120 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK + 1 + DO 130, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 130 CONTINUE + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTPMV . +* + END + SUBROUTINE DTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + K = KK + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( J ) = TEMP + KK = KK + J + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 120 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + K = KK + DO 130, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( JX ) = TEMP + JX = JX - INCX + KK = KK - (N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTPSV . +* + END + SUBROUTINE DTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + DOUBLE PRECISION ALPHA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ), +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B. +* + IF( UPPER )THEN + DO 110, J = 1, N + DO 100, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + B( I, J ) = ALPHA*TEMP + 100 CONTINUE + 110 CONTINUE + ELSE + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 120, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 120 CONTINUE + B( I, J ) = ALPHA*TEMP + 130 CONTINUE + 140 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 180, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 150 CONTINUE + DO 170, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 160, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + ELSE + DO 220, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 190, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 190 CONTINUE + DO 210, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 200, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + 220 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A'. +* + IF( UPPER )THEN + DO 260, K = 1, N + DO 240, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 230, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 250, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + ELSE + DO 300, K = N, 1, -1 + DO 280, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 270, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRMM . +* + END + SUBROUTINE DTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 110, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 130, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRMV . +* + END + SUBROUTINE DTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + DOUBLE PRECISION ALPHA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B. +* + IF( UPPER )THEN + DO 130, J = 1, N + DO 120, I = 1, M + TEMP = ALPHA*B( I, J ) + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 120 CONTINUE + 130 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + DO 140, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 140 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 210, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 170, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 170 CONTINUE + END IF + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 180, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 200, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 200 CONTINUE + END IF + 210 CONTINUE + ELSE + DO 260, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 220, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 220 CONTINUE + END IF + DO 240, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 230, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 250, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 250 CONTINUE + END IF + 260 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ). +* + IF( UPPER )THEN + DO 310, K = N, 1, -1 + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + DO 290, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 280, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 280 CONTINUE + END IF + 290 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 300, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + ELSE + DO 360, K = 1, N + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 320, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 320 CONTINUE + END IF + DO 340, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 330, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 330 CONTINUE + END IF + 340 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 350, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRSM . +* + END + SUBROUTINE DTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + DO 130, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRSV . +* + END + double precision function dzasum(n,zx,incx) +c +c takes the sum of the absolute values. +c jack dongarra, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*) + double precision stemp,dcabs1 + integer i,incx,ix,n +c + dzasum = 0.0d0 + stemp = 0.0d0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + do 10 i = 1,n + stemp = stemp + dcabs1(zx(ix)) + ix = ix + incx + 10 continue + dzasum = stemp + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + stemp = stemp + dcabs1(zx(i)) + 30 continue + dzasum = stemp + return + end + DOUBLE PRECISION FUNCTION DZNRM2( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* DZNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* DZNRM2 := sqrt( conjg( x' )*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to ZLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION NORM, SCALE, SSQ, TEMP +* .. Intrinsic Functions .. + INTRINSIC ABS, DIMAG, DBLE, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( DBLE( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( DBLE( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + IF( DIMAG( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( DIMAG( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + DZNRM2 = NORM + RETURN +* +* End of DZNRM2. +* + END + integer function icamax(n,cx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*) + real smax + integer i,incx,ix,n + complex zdum + real cabs1 + cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum)) +c + icamax = 0 + if( n.lt.1 .or. incx.le.0 ) return + icamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + smax = cabs1(cx(1)) + ix = ix + incx + do 10 i = 2,n + if(cabs1(cx(ix)).le.smax) go to 5 + icamax = i + smax = cabs1(cx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 smax = cabs1(cx(1)) + do 30 i = 2,n + if(cabs1(cx(i)).le.smax) go to 30 + icamax = i + smax = cabs1(cx(i)) + 30 continue + return + end + integer function idamax(n,dx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dmax + integer i,incx,ix,n +c + idamax = 0 + if( n.lt.1 .or. incx.le.0 ) return + idamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + dmax = dabs(dx(1)) + ix = ix + incx + do 10 i = 2,n + if(dabs(dx(ix)).le.dmax) go to 5 + idamax = i + dmax = dabs(dx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 dmax = dabs(dx(1)) + do 30 i = 2,n + if(dabs(dx(i)).le.dmax) go to 30 + idamax = i + dmax = dabs(dx(i)) + 30 continue + return + end + integer function isamax(n,sx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),smax + integer i,incx,ix,n +c + isamax = 0 + if( n.lt.1 .or. incx.le.0 ) return + isamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + smax = abs(sx(1)) + ix = ix + incx + do 10 i = 2,n + if(abs(sx(ix)).le.smax) go to 5 + isamax = i + smax = abs(sx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 smax = abs(sx(1)) + do 30 i = 2,n + if(abs(sx(i)).le.smax) go to 30 + isamax = i + smax = abs(sx(i)) + 30 continue + return + end + integer function izamax(n,zx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, 1/15/85. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*) + double precision smax + integer i,incx,ix,n + double precision dcabs1 +c + izamax = 0 + if( n.lt.1 .or. incx.le.0 )return + izamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + smax = dcabs1(zx(1)) + ix = ix + incx + do 10 i = 2,n + if(dcabs1(zx(ix)).le.smax) go to 5 + izamax = i + smax = dcabs1(zx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 smax = dcabs1(zx(1)) + do 30 i = 2,n + if(dcabs1(zx(i)).le.smax) go to 30 + izamax = i + smax = dcabs1(zx(i)) + 30 continue + return + end + LOGICAL FUNCTION LSAME( CA, CB ) +* +* -- LAPACK auxiliary routine (version 2.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* January 31, 1994 +* +* .. Scalar Arguments .. + CHARACTER CA, CB +* .. +* +* Purpose +* ======= +* +* LSAME returns .TRUE. if CA is the same letter as CB regardless of +* case. +* +* Arguments +* ========= +* +* CA (input) CHARACTER*1 +* CB (input) CHARACTER*1 +* CA and CB specify the single characters to be compared. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC ICHAR +* .. +* .. Local Scalars .. + INTEGER INTA, INTB, ZCODE +* .. +* .. Executable Statements .. +* +* Test if the characters are equal +* + LSAME = CA.EQ.CB + IF( LSAME ) + $ RETURN +* +* Now test for equivalence if both characters are alphabetic. +* + ZCODE = ICHAR( 'Z' ) +* +* Use 'Z' rather than 'A' so that ASCII can be detected on Prime +* machines, on which ICHAR returns a value with bit 8 set. +* ICHAR('A') on Prime machines returns 193 which is the same as +* ICHAR('A') on an EBCDIC machine. +* + INTA = ICHAR( CA ) + INTB = ICHAR( CB ) +* + IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN +* +* ASCII is assumed - ZCODE is the ASCII code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32 + IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32 +* + ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN +* +* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.129 .AND. INTA.LE.137 .OR. + $ INTA.GE.145 .AND. INTA.LE.153 .OR. + $ INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64 + IF( INTB.GE.129 .AND. INTB.LE.137 .OR. + $ INTB.GE.145 .AND. INTB.LE.153 .OR. + $ INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64 +* + ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN +* +* ASCII is assumed, on Prime machines - ZCODE is the ASCII code +* plus 128 of either lower or upper case 'Z'. +* + IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32 + IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32 + END IF + LSAME = INTA.EQ.INTB +* +* RETURN +* +* End of LSAME +* + END + real function sasum(n,sx,incx) +c +c takes the sum of the absolute values. +c uses unrolled loops for increment equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),stemp + integer i,incx,m,mp1,n,nincx +c + sasum = 0.0e0 + stemp = 0.0e0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + stemp = stemp + abs(sx(i)) + 10 continue + sasum = stemp + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,6) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + stemp = stemp + abs(sx(i)) + 30 continue + if( n .lt. 6 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,6 + stemp = stemp + abs(sx(i)) + abs(sx(i + 1)) + abs(sx(i + 2)) + * + abs(sx(i + 3)) + abs(sx(i + 4)) + abs(sx(i + 5)) + 50 continue + 60 sasum = stemp + return + end + subroutine saxpy(n,sa,sx,incx,sy,incy) +c +c constant times a vector plus a vector. +c uses unrolled loop for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),sa + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if (sa .eq. 0.0) return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + sy(iy) = sy(iy) + sa*sx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,4) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + sy(i) = sy(i) + sa*sx(i) + 30 continue + if( n .lt. 4 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,4 + sy(i) = sy(i) + sa*sx(i) + sy(i + 1) = sy(i + 1) + sa*sx(i + 1) + sy(i + 2) = sy(i + 2) + sa*sx(i + 2) + sy(i + 3) = sy(i + 3) + sa*sx(i + 3) + 50 continue + return + end + real function scasum(n,cx,incx) +c +c takes the sum of the absolute values of a complex vector and +c returns a single precision result. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*) + real stemp + integer i,incx,n,nincx +c + scasum = 0.0e0 + stemp = 0.0e0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i))) + 10 continue + scasum = stemp + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i))) + 30 continue + scasum = stemp + return + end + REAL FUNCTION SCNRM2( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* SCNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* SCNRM2 := sqrt( conjg( x' )*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to CLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + INTEGER IX + REAL NORM, SCALE, SSQ, TEMP +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL CLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( REAL( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( REAL( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + IF( AIMAG( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( AIMAG( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + SCNRM2 = NORM + RETURN +* +* End of SCNRM2. +* + END + subroutine scopy(n,sx,incx,sy,incy) +c +c copies a vector, x, to a vector, y. +c uses unrolled loops for increments equal to 1. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*) + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + sy(iy) = sx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,7) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + sy(i) = sx(i) + 30 continue + if( n .lt. 7 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,7 + sy(i) = sx(i) + sy(i + 1) = sx(i + 1) + sy(i + 2) = sx(i + 2) + sy(i + 3) = sx(i + 3) + sy(i + 4) = sx(i + 4) + sy(i + 5) = sx(i + 5) + sy(i + 6) = sx(i + 6) + 50 continue + return + end + real function sdot(n,sx,incx,sy,incy) +c +c forms the dot product of two vectors. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),stemp + integer i,incx,incy,ix,iy,m,mp1,n +c + stemp = 0.0e0 + sdot = 0.0e0 + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + stemp = stemp + sx(ix)*sy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + sdot = stemp + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + stemp = stemp + sx(i)*sy(i) + 30 continue + if( n .lt. 5 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) + + * sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4) + 50 continue + 60 sdot = stemp + return + end +*DECK SDSDOT + REAL FUNCTION SDSDOT (N, SB, SX, INCX, SY, INCY) +C***BEGIN PROLOGUE SDSDOT +C***PURPOSE Compute the inner product of two vectors with extended +C precision accumulation. +C***LIBRARY SLATEC (BLAS) +C***CATEGORY D1A4 +C***TYPE SINGLE PRECISION (SDSDOT-S, CDCDOT-C) +C***KEYWORDS BLAS, DOT PRODUCT, INNER PRODUCT, LINEAR ALGEBRA, VECTOR +C***AUTHOR Lawson, C. L., (JPL) +C Hanson, R. J., (SNLA) +C Kincaid, D. R., (U. of Texas) +C Krogh, F. T., (JPL) +C***DESCRIPTION +C +C B L A S Subprogram +C Description of Parameters +C +C --Input-- +C N number of elements in input vector(s) +C SB single precision scalar to be added to inner product +C SX single precision vector with N elements +C INCX storage spacing between elements of SX +C SY single precision vector with N elements +C INCY storage spacing between elements of SY +C +C --Output-- +C SDSDOT single precision dot product (SB if N .LE. 0) +C +C Returns S.P. result with dot product accumulated in D.P. +C SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), +C where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is +C defined in a similar way using INCY. +C +C***REFERENCES C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. +C Krogh, Basic linear algebra subprograms for Fortran +C usage, Algorithm No. 539, Transactions on Mathematical +C Software 5, 3 (September 1979), pp. 308-323. +C***ROUTINES CALLED (NONE) +C***REVISION HISTORY (YYMMDD) +C 791001 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890831 Modified array declarations. (WRB) +C 890831 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 920310 Corrected definition of LX in DESCRIPTION. (WRB) +C 920501 Reformatted the REFERENCES section. (WRB) +C***END PROLOGUE SDSDOT + REAL SX(*), SY(*), SB + DOUBLE PRECISION DSDOT +C***FIRST EXECUTABLE STATEMENT SDSDOT + DSDOT = SB + IF (N .LE. 0) GO TO 30 + IF (INCX.EQ.INCY .AND. INCX.GT.0) GO TO 40 +C +C Code for unequal or nonpositive increments. +C + KX = 1 + KY = 1 + IF (INCX .LT. 0) KX = 1+(1-N)*INCX + IF (INCY .LT. 0) KY = 1+(1-N)*INCY + DO 10 I = 1,N + DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY)) + KX = KX + INCX + KY = KY + INCY + 10 CONTINUE + 30 SDSDOT = DSDOT + RETURN +C +C Code for equal and positive increments. +C + 40 NS = N*INCX + DO 50 I = 1,NS,INCX + DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I)) + 50 CONTINUE + SDSDOT = DSDOT + RETURN + END + SUBROUTINE SGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + K = KUP1 - J + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGBMV . +* + END + SUBROUTINE SGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X', +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = A'. +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = B'. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* transposed and set NROWA, NCOLA and NROWB as the number of rows +* and columns of A and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.LSAME( TRANSB, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And if alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE + IF( NOTA )THEN +* +* Form C := alpha*A*B' + beta*C +* + DO 170, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 130, I = 1, M + C( I, J ) = ZERO + 130 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 140, I = 1, M + C( I, J ) = BETA*C( I, J ) + 140 CONTINUE + END IF + DO 160, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 150 CONTINUE + END IF + 160 CONTINUE + 170 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 200, J = 1, N + DO 190, I = 1, M + TEMP = ZERO + DO 180, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 180 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 190 CONTINUE + 200 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEMM . +* + END + SUBROUTINE SGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + DO 110, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEMV . +* + END + SUBROUTINE SGER ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SGER performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of SGER . +* + END + REAL FUNCTION SNRM2 ( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + REAL X( * ) +* .. +* +* SNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* SNRM2 := sqrt( x'*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to SLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + INTEGER IX + REAL ABSXI, NORM, SCALE, SSQ +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE IF( N.EQ.1 )THEN + NORM = ABS( X( 1 ) ) + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL SLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( X( IX ).NE.ZERO )THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI )THEN + SSQ = ONE + SSQ*( SCALE/ABSXI )**2 + SCALE = ABSXI + ELSE + SSQ = SSQ + ( ABSXI/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + SNRM2 = NORM + RETURN +* +* End of SNRM2. +* + END + subroutine srot (n,sx,incx,sy,incy,c,s) +c +c applies a plane rotation. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),stemp,c,s + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + stemp = c*sx(ix) + s*sy(iy) + sy(iy) = c*sy(iy) - s*sx(ix) + sx(ix) = stemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + stemp = c*sx(i) + s*sy(i) + sy(i) = c*sy(i) - s*sx(i) + sx(i) = stemp + 30 continue + return + end + subroutine srotg(sa,sb,c,s) +c +c construct givens plane rotation. +c jack dongarra, linpack, 3/11/78. +c + real sa,sb,c,s,roe,scale,r,z +c + roe = sb + if( abs(sa) .gt. abs(sb) ) roe = sa + scale = abs(sa) + abs(sb) + if( scale .ne. 0.0 ) go to 10 + c = 1.0 + s = 0.0 + r = 0.0 + z = 0.0 + go to 20 + 10 r = scale*sqrt((sa/scale)**2 + (sb/scale)**2) + r = sign(1.0,roe)*r + c = sa/r + s = sb/r + z = 1.0 + if( abs(sa) .gt. abs(sb) ) z = s + if( abs(sb) .ge. abs(sa) .and. c .ne. 0.0 ) z = 1.0/c + 20 sa = r + sb = z + return + end + SUBROUTINE SROTM (N,SX,INCX,SY,INCY,SPARAM) +C +C APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX +C +C (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN +C (DX**T) +C +C SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE +C LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. +C WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. +C +C SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 +C +C (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) +C H=( ) ( ) ( ) ( ) +C (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). +C SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. +C + DIMENSION SX(1),SY(1),SPARAM(5) + DATA ZERO,TWO/0.E0,2.E0/ +C + SFLAG=SPARAM(1) + IF(N .LE. 0 .OR.(SFLAG+TWO.EQ.ZERO)) GO TO 140 + IF(.NOT.(INCX.EQ.INCY.AND. INCX .GT.0)) GO TO 70 +C + NSTEPS=N*INCX + IF(SFLAG) 50,10,30 + 10 CONTINUE + SH12=SPARAM(4) + SH21=SPARAM(3) + DO 20 I=1,NSTEPS,INCX + W=SX(I) + Z=SY(I) + SX(I)=W+Z*SH12 + SY(I)=W*SH21+Z + 20 CONTINUE + GO TO 140 + 30 CONTINUE + SH11=SPARAM(2) + SH22=SPARAM(5) + DO 40 I=1,NSTEPS,INCX + W=SX(I) + Z=SY(I) + SX(I)=W*SH11+Z + SY(I)=-W+SH22*Z + 40 CONTINUE + GO TO 140 + 50 CONTINUE + SH11=SPARAM(2) + SH12=SPARAM(4) + SH21=SPARAM(3) + SH22=SPARAM(5) + DO 60 I=1,NSTEPS,INCX + W=SX(I) + Z=SY(I) + SX(I)=W*SH11+Z*SH12 + SY(I)=W*SH21+Z*SH22 + 60 CONTINUE + GO TO 140 + 70 CONTINUE + KX=1 + KY=1 + IF(INCX .LT. 0) KX=1+(1-N)*INCX + IF(INCY .LT. 0) KY=1+(1-N)*INCY +C + IF(SFLAG)120,80,100 + 80 CONTINUE + SH12=SPARAM(4) + SH21=SPARAM(3) + DO 90 I=1,N + W=SX(KX) + Z=SY(KY) + SX(KX)=W+Z*SH12 + SY(KY)=W*SH21+Z + KX=KX+INCX + KY=KY+INCY + 90 CONTINUE + GO TO 140 + 100 CONTINUE + SH11=SPARAM(2) + SH22=SPARAM(5) + DO 110 I=1,N + W=SX(KX) + Z=SY(KY) + SX(KX)=W*SH11+Z + SY(KY)=-W+SH22*Z + KX=KX+INCX + KY=KY+INCY + 110 CONTINUE + GO TO 140 + 120 CONTINUE + SH11=SPARAM(2) + SH12=SPARAM(4) + SH21=SPARAM(3) + SH22=SPARAM(5) + DO 130 I=1,N + W=SX(KX) + Z=SY(KY) + SX(KX)=W*SH11+Z*SH12 + SY(KY)=W*SH21+Z*SH22 + KX=KX+INCX + KY=KY+INCY + 130 CONTINUE + 140 CONTINUE + RETURN + END + SUBROUTINE SROTMG (SD1,SD2,SX1,SY1,SPARAM) +C +C CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS +C THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)* +C SY2)**T. +C WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. +C +C SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 +C +C (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) +C H=( ) ( ) ( ) ( ) +C (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). +C LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 +C RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE +C VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) +C +C THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE +C INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE +C OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. +C + DIMENSION SPARAM(5) +C + DATA ZERO,ONE,TWO /0.E0,1.E0,2.E0/ + DATA GAM,GAMSQ,RGAMSQ/4096.E0,1.67772E7,5.96046E-8/ + IF(.NOT. SD1 .LT. ZERO) GO TO 10 +C GO ZERO-H-D-AND-SX1.. + GO TO 60 + 10 CONTINUE +C CASE-SD1-NONNEGATIVE + SP2=SD2*SY1 + IF(.NOT. SP2 .EQ. ZERO) GO TO 20 + SFLAG=-TWO + GO TO 260 +C REGULAR-CASE.. + 20 CONTINUE + SP1=SD1*SX1 + SQ2=SP2*SY1 + SQ1=SP1*SX1 +C + IF(.NOT. ABS(SQ1) .GT. ABS(SQ2)) GO TO 40 + SH21=-SY1/SX1 + SH12=SP2/SP1 +C + SU=ONE-SH12*SH21 +C + IF(.NOT. SU .LE. ZERO) GO TO 30 +C GO ZERO-H-D-AND-SX1.. + GO TO 60 + 30 CONTINUE + SFLAG=ZERO + SD1=SD1/SU + SD2=SD2/SU + SX1=SX1*SU +C GO SCALE-CHECK.. + GO TO 100 + 40 CONTINUE + IF(.NOT. SQ2 .LT. ZERO) GO TO 50 +C GO ZERO-H-D-AND-SX1.. + GO TO 60 + 50 CONTINUE + SFLAG=ONE + SH11=SP1/SP2 + SH22=SX1/SY1 + SU=ONE+SH11*SH22 + STEMP=SD2/SU + SD2=SD1/SU + SD1=STEMP + SX1=SY1*SU +C GO SCALE-CHECK + GO TO 100 +C PROCEDURE..ZERO-H-D-AND-SX1.. + 60 CONTINUE + SFLAG=-ONE + SH11=ZERO + SH12=ZERO + SH21=ZERO + SH22=ZERO +C + SD1=ZERO + SD2=ZERO + SX1=ZERO +C RETURN.. + GO TO 220 +C PROCEDURE..FIX-H.. + 70 CONTINUE + IF(.NOT. SFLAG .GE. ZERO) GO TO 90 +C + IF(.NOT. SFLAG .EQ. ZERO) GO TO 80 + SH11=ONE + SH22=ONE + SFLAG=-ONE + GO TO 90 + 80 CONTINUE + SH21=-ONE + SH12=ONE + SFLAG=-ONE + 90 CONTINUE + GO TO IGO,(120,150,180,210) +C PROCEDURE..SCALE-CHECK + 100 CONTINUE + 110 CONTINUE + IF(.NOT. SD1 .LE. RGAMSQ) GO TO 130 + IF(SD1 .EQ. ZERO) GO TO 160 + ASSIGN 120 TO IGO +C FIX-H.. + GO TO 70 + 120 CONTINUE + SD1=SD1*GAM**2 + SX1=SX1/GAM + SH11=SH11/GAM + SH12=SH12/GAM + GO TO 110 + 130 CONTINUE + 140 CONTINUE + IF(.NOT. SD1 .GE. GAMSQ) GO TO 160 + ASSIGN 150 TO IGO +C FIX-H.. + GO TO 70 + 150 CONTINUE + SD1=SD1/GAM**2 + SX1=SX1*GAM + SH11=SH11*GAM + SH12=SH12*GAM + GO TO 140 + 160 CONTINUE + 170 CONTINUE + IF(.NOT. ABS(SD2) .LE. RGAMSQ) GO TO 190 + IF(SD2 .EQ. ZERO) GO TO 220 + ASSIGN 180 TO IGO +C FIX-H.. + GO TO 70 + 180 CONTINUE + SD2=SD2*GAM**2 + SH21=SH21/GAM + SH22=SH22/GAM + GO TO 170 + 190 CONTINUE + 200 CONTINUE + IF(.NOT. ABS(SD2) .GE. GAMSQ) GO TO 220 + ASSIGN 210 TO IGO +C FIX-H.. + GO TO 70 + 210 CONTINUE + SD2=SD2/GAM**2 + SH21=SH21*GAM + SH22=SH22*GAM + GO TO 200 + 220 CONTINUE + IF(SFLAG)250,230,240 + 230 CONTINUE + SPARAM(3)=SH21 + SPARAM(4)=SH12 + GO TO 260 + 240 CONTINUE + SPARAM(2)=SH11 + SPARAM(5)=SH22 + GO TO 260 + 250 CONTINUE + SPARAM(2)=SH11 + SPARAM(3)=SH21 + SPARAM(4)=SH12 + SPARAM(5)=SH22 + 260 CONTINUE + SPARAM(1)=SFLAG + RETURN + END + SUBROUTINE SSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( 1, J ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( 1, J ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSBMV . +* + END + subroutine sscal(n,sa,sx,incx) +c +c scales a vector by a constant. +c uses unrolled loops for increment equal to 1. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sa,sx(*) + integer i,incx,m,mp1,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + sx(i) = sa*sx(i) + 10 continue + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + sx(i) = sa*sx(i) + 30 continue + if( n .lt. 5 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + sx(i) = sa*sx(i) + sx(i + 1) = sa*sx(i + 1) + sx(i + 2) = sa*sx(i + 2) + sx(i + 3) = sa*sx(i + 3) + sx(i + 4) = sa*sx(i + 4) + 50 continue + return + end + SUBROUTINE SSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPMV . +* + END + SUBROUTINE SSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSPR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPR2 . +* + END + SUBROUTINE SSPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SSPR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPR . +* + END + subroutine sswap (n,sx,incx,sy,incy) +c +c interchanges two vectors. +c uses unrolled loops for increments equal to 1. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),stemp + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + stemp = sx(ix) + sx(ix) = sy(iy) + sy(iy) = stemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,3) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + stemp = sx(i) + sx(i) = sy(i) + sy(i) = stemp + 30 continue + if( n .lt. 3 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,3 + stemp = sx(i) + sx(i) = sy(i) + sy(i) = stemp + stemp = sx(i + 1) + sx(i + 1) = sy(i + 1) + sy(i + 1) = stemp + stemp = sx(i + 2) + sx(i + 2) = sy(i + 2) + sy(i + 2) = stemp + 50 continue + return + end + SUBROUTINE SSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + REAL TEMP1, TEMP2 +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of SSYMM . +* + END + SUBROUTINE SSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSYMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYMV . +* + END + SUBROUTINE SSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSYR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR2 . +* + END + SUBROUTINE SSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + REAL TEMP1, TEMP2 +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR2K. +* + END + SUBROUTINE SSYR ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SSYR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR . +* + END + SUBROUTINE SSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYRK . +* + END + SUBROUTINE STBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 110, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 130, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STBMV . +* + END + SUBROUTINE STBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A')*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 110, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + DO 130, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STBSV . +* + END + SUBROUTINE STPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK =1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK - 1 + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + X( J ) = TEMP + KK = KK - J + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 110, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 120 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK + 1 + DO 130, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 130 CONTINUE + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STPMV . +* + END + SUBROUTINE STPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + K = KK + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( J ) = TEMP + KK = KK + J + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 120 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + K = KK + DO 130, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( JX ) = TEMP + JX = JX - INCX + KK = KK - (N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STPSV . +* + END + SUBROUTINE STRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + REAL ALPHA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ), +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B. +* + IF( UPPER )THEN + DO 110, J = 1, N + DO 100, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + B( I, J ) = ALPHA*TEMP + 100 CONTINUE + 110 CONTINUE + ELSE + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 120, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 120 CONTINUE + B( I, J ) = ALPHA*TEMP + 130 CONTINUE + 140 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 180, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 150 CONTINUE + DO 170, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 160, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + ELSE + DO 220, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 190, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 190 CONTINUE + DO 210, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 200, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + 220 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A'. +* + IF( UPPER )THEN + DO 260, K = 1, N + DO 240, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 230, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 250, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + ELSE + DO 300, K = N, 1, -1 + DO 280, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 270, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRMM . +* + END + SUBROUTINE STRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 110, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 130, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRMV . +* + END + SUBROUTINE STRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + REAL ALPHA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B. +* + IF( UPPER )THEN + DO 130, J = 1, N + DO 120, I = 1, M + TEMP = ALPHA*B( I, J ) + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 120 CONTINUE + 130 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + DO 140, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 140 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 210, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 170, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 170 CONTINUE + END IF + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 180, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 200, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 200 CONTINUE + END IF + 210 CONTINUE + ELSE + DO 260, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 220, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 220 CONTINUE + END IF + DO 240, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 230, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 250, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 250 CONTINUE + END IF + 260 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ). +* + IF( UPPER )THEN + DO 310, K = N, 1, -1 + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + DO 290, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 280, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 280 CONTINUE + END IF + 290 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 300, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + ELSE + DO 360, K = 1, N + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 320, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 320 CONTINUE + END IF + DO 340, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 330, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 330 CONTINUE + END IF + 340 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 350, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRSM . +* + END + SUBROUTINE STRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + DO 130, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRSV . +* + END + SUBROUTINE XERBLA( SRNAME, INFO ) +* +* -- LAPACK auxiliary routine (preliminary version) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER*6 SRNAME + INTEGER INFO +* .. +* +* Purpose +* ======= +* +* XERBLA is an error handler for the LAPACK routines. +* It is called by an LAPACK routine if an input parameter has an +* invalid value. A message is printed and execution stops. +* +* Installers may consider modifying the STOP statement in order to +* call system-specific exception-handling facilities. +* +* Arguments +* ========= +* +* SRNAME (input) CHARACTER*6 +* The name of the routine which called XERBLA. +* +* INFO (input) INTEGER +* The position of the invalid parameter in the parameter list +* of the calling routine. +* +* + WRITE( *, FMT = 9999 )SRNAME, INFO +* + STOP +* + 9999 FORMAT( ' ** On entry to ', A6, ' parameter number ', I2, ' had ', + $ 'an illegal value' ) +* +* End of XERBLA +* + END + subroutine zaxpy(n,za,zx,incx,zy,incy) +c +c constant times a vector plus a vector. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),za + integer i,incx,incy,ix,iy,n + double precision dcabs1 + if(n.le.0)return + if (dcabs1(za) .eq. 0.0d0) return + if (incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + zy(iy) = zy(iy) + za*zx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + zy(i) = zy(i) + za*zx(i) + 30 continue + return + end + subroutine zcopy(n,zx,incx,zy,incy) +c +c copies a vector, x, to a vector, y. +c jack dongarra, linpack, 4/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*) + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + zy(iy) = zx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + zy(i) = zx(i) + 30 continue + return + end + double complex function zdotc(n,zx,incx,zy,incy) +c +c forms the dot product of a vector. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n + ztemp = (0.0d0,0.0d0) + zdotc = (0.0d0,0.0d0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = ztemp + dconjg(zx(ix))*zy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + zdotc = ztemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ztemp = ztemp + dconjg(zx(i))*zy(i) + 30 continue + zdotc = ztemp + return + end + double complex function zdotu(n,zx,incx,zy,incy) +c +c forms the dot product of two vectors. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n + ztemp = (0.0d0,0.0d0) + zdotu = (0.0d0,0.0d0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = ztemp + zx(ix)*zy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + zdotu = ztemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ztemp = ztemp + zx(i)*zy(i) + 30 continue + zdotu = ztemp + return + end + subroutine zdrot (n,zx,incx,zy,incy,c,s) +c +c applies a plane rotation, where the cos and sin (c and s) are +c double precision and the vectors zx and zy are double complex. +c jack dongarra, linpack, 3/11/78. +c + double complex zx(1),zy(1),ztemp + double precision c,s + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = c*zx(ix) + s*zy(iy) + zy(iy) = c*zy(iy) - s*zx(ix) + zx(ix) = ztemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ztemp = c*zx(i) + s*zy(i) + zy(i) = c*zy(i) - s*zx(i) + zx(i) = ztemp + 30 continue + return + end + subroutine zdscal(n,da,zx,incx) +c +c scales a vector by a constant. +c jack dongarra, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*) + double precision da + integer i,incx,ix,n +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + do 10 i = 1,n + zx(ix) = dcmplx(da,0.0d0)*zx(ix) + ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + zx(i) = dcmplx(da,0.0d0)*zx(i) + 30 continue + return + end + SUBROUTINE ZGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + K = KUP1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + DCONJG( A( K + I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + DCONJG( A( K + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGBMV . +* + END + SUBROUTINE ZGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = conjg( B' ). +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. Local Scalars .. + LOGICAL CONJA, CONJB, NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA, NCOLA and NROWB as the number of rows and columns of A +* and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + CONJA = LSAME( TRANSA, 'C' ) + CONJB = LSAME( TRANSB, 'C' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.CONJA ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.CONJB ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE IF( CONJA )THEN +* +* Form C := alpha*conjg( A' )*B + beta*C. +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 150, J = 1, N + DO 140, I = 1, M + TEMP = ZERO + DO 130, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 130 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF( NOTA )THEN + IF( CONJB )THEN +* +* Form C := alpha*A*conjg( B' ) + beta*C. +* + DO 200, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 160, I = 1, M + C( I, J ) = ZERO + 160 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 170, I = 1, M + C( I, J ) = BETA*C( I, J ) + 170 CONTINUE + END IF + DO 190, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( B( J, L ) ) + DO 180, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B' + beta*C +* + DO 250, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 210, I = 1, M + C( I, J ) = ZERO + 210 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 220, I = 1, M + C( I, J ) = BETA*C( I, J ) + 220 CONTINUE + END IF + DO 240, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 230, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 230 CONTINUE + END IF + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF( CONJA )THEN + IF( CONJB )THEN +* +* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. +* + DO 280, J = 1, N + DO 270, I = 1, M + TEMP = ZERO + DO 260, L = 1, K + TEMP = TEMP + + $ DCONJG( A( L, I ) )*DCONJG( B( J, L ) ) + 260 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*conjg( A' )*B' + beta*C +* + DO 310, J = 1, N + DO 300, I = 1, M + TEMP = ZERO + DO 290, L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*B( J, L ) + 290 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF( CONJB )THEN +* +* Form C := alpha*A'*conjg( B' ) + beta*C +* + DO 340, J = 1, N + DO 330, I = 1, M + TEMP = ZERO + DO 320, L = 1, K + TEMP = TEMP + A( L, I )*DCONJG( B( J, L ) ) + 320 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 370, J = 1, N + DO 360, I = 1, M + TEMP = ZERO + DO 350, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 350 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEMM . +* + END + SUBROUTINE ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + IF( NOCONJ )THEN + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = 1, M + TEMP = TEMP + DCONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + IF( NOCONJ )THEN + DO 120, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = 1, M + TEMP = TEMP + DCONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEMV . +* + END + SUBROUTINE ZGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGERC performs the rank 1 operation +* +* A := alpha*x*conjg( y' ) + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGERC ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( Y( JY ) ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( Y( JY ) ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of ZGERC . +* + END + SUBROUTINE ZGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGERU performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGERU ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of ZGERU . +* + END + SUBROUTINE ZHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*DBLE( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*DBLE( A( 1, J ) ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( 1, J ) ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHBMV . +* + END + SUBROUTINE ZHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZHEMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is an hermitian matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the hermitian matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the hermitian matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* hermitian matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* hermitian matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*DCONJG( A( K, I ) ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*DCONJG( A( K, I ) ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*DBLE( A( J, J ) ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*DCONJG( A( J, K ) ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*DCONJG( A( J, K ) ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of ZHEMM . +* + END + SUBROUTINE ZHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHEMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHEMV . +* + END + SUBROUTINE ZHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHER2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHER2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER2 . +* + END + SUBROUTINE ZHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, BETA, + $ C, LDC ) +* .. Scalar Arguments .. + CHARACTER TRANS, UPLO + INTEGER K, LDA, LDB, LDC, N + DOUBLE PRECISION BETA + COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZHER2K performs one of the hermitian rank 2k operations +* +* C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, +* +* where alpha and beta are scalars with beta real, C is an n by n +* hermitian matrix and A and B are n by k matrices in the first case +* and k by n matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + +* conjg( alpha )*B*conjg( A' ) + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + +* conjg( alpha )*conjg( B' )*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) ) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN + INFO = 2 + ELSE IF( N.LT.0 ) THEN + INFO = 3 + ELSE IF( K.LT.0 ) THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) ) THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) ) THEN + INFO = 12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHER2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO ) THEN + IF( UPPER ) THEN + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 60 J = 1, N + DO 50 I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + C( J, J ) = BETA*DBLE( C( J, J ) ) + DO 70 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + +* C. +* + IF( UPPER ) THEN + DO 130 J = 1, N + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 90 I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + DO 100 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 120 L = 1, K + IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) ) + $ THEN + TEMP1 = ALPHA*DCONJG( B( J, L ) ) + TEMP2 = DCONJG( ALPHA*A( J, L ) ) + DO 110 I = 1, J - 1 + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1, N + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 140 I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + DO 150 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 170 L = 1, K + IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) ) + $ THEN + TEMP1 = ALPHA*DCONJG( B( J, L ) ) + TEMP2 = DCONJG( ALPHA*A( J, L ) ) + DO 160 I = J + 1, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 ) + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + +* C. +* + IF( UPPER ) THEN + DO 210 J = 1, N + DO 200 I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190 L = 1, K + TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J ) + 190 CONTINUE + IF( I.EQ.J ) THEN + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + ELSE + C( J, J ) = BETA*DBLE( C( J, J ) ) + + $ DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + END IF + ELSE + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 + + $ DCONJG( ALPHA )*TEMP2 + END IF + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1, N + DO 230 I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220 L = 1, K + TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J ) + 220 CONTINUE + IF( I.EQ.J ) THEN + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + ELSE + C( J, J ) = BETA*DBLE( C( J, J ) ) + + $ DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + END IF + ELSE + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 + + $ DCONJG( ALPHA )*TEMP2 + END IF + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER2K. +* + END + SUBROUTINE ZHER ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZHER performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( J )*TEMP ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + IX = KX + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( JX )*TEMP ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( J ) ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( JX ) ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + A( I, J ) = A( I, J ) + X( IX )*TEMP + 70 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER . +* + END + SUBROUTINE ZHERK( UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER TRANS, UPLO + INTEGER K, LDA, LDC, N + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZHERK performs one of the hermitian rank k operations +* +* C := alpha*A*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*A + beta*C, +* +* where alpha and beta are real scalars, C is an n by n hermitian +* matrix and A is an n by k matrix in the first case and a k by n +* matrix in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DCONJG, MAX +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + DOUBLE PRECISION RTEMP + COMPLEX*16 TEMP +* .. +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) ) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN + INFO = 2 + ELSE IF( N.LT.0 ) THEN + INFO = 3 + ELSE IF( K.LT.0 ) THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) ) THEN + INFO = 10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHERK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO ) THEN + IF( UPPER ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO ) THEN + DO 60 J = 1, N + DO 50 I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + C( J, J ) = BETA*DBLE( C( J, J ) ) + DO 70 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Form C := alpha*A*conjg( A' ) + beta*C. +* + IF( UPPER ) THEN + DO 130 J = 1, N + IF( BETA.EQ.ZERO ) THEN + DO 90 I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + DO 100 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 120 L = 1, K + IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN + TEMP = ALPHA*DCONJG( A( J, L ) ) + DO 110 I = 1, J - 1 + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( TEMP*A( I, L ) ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1, N + IF( BETA.EQ.ZERO ) THEN + DO 140 I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + C( J, J ) = BETA*DBLE( C( J, J ) ) + DO 150 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 170 L = 1, K + IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN + TEMP = ALPHA*DCONJG( A( J, L ) ) + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( TEMP*A( J, L ) ) + DO 160 I = J + 1, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*A + beta*C. +* + IF( UPPER ) THEN + DO 220 J = 1, N + DO 200 I = 1, J - 1 + TEMP = ZERO + DO 190 L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + RTEMP = ZERO + DO 210 L = 1, K + RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J ) + 210 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) ) + END IF + 220 CONTINUE + ELSE + DO 260 J = 1, N + RTEMP = ZERO + DO 230 L = 1, K + RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J ) + 230 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) ) + END IF + DO 250 I = J + 1, N + TEMP = ZERO + DO 240 L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J ) + 240 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 250 CONTINUE + 260 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHERK . +* + END + SUBROUTINE ZHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK ) ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK ) ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPMV . +* + END + SUBROUTINE ZHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHPR2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + + $ DBLE( X( JX )*TEMP1 + + $ Y( JY )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + AP( KK ) = DBLE( AP( KK ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + AP( KK ) = DBLE( AP( KK ) ) + + $ DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPR2 . +* + END + SUBROUTINE ZHPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZHPR performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + $ + DBLE( X( J )*TEMP ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + IX = KX + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + $ + DBLE( X( JX )*TEMP ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( J ) ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( JX ) ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPR . +* + END + subroutine zrotg(ca,cb,c,s) + double complex ca,cb,s + double precision c + double precision norm,scale + double complex alpha + if (cdabs(ca) .ne. 0.0d0) go to 10 + c = 0.0d0 + s = (1.0d0,0.0d0) + ca = cb + go to 20 + 10 continue + scale = cdabs(ca) + cdabs(cb) + norm = scale*dsqrt((cdabs(ca/dcmplx(scale,0.0d0)))**2 + + * (cdabs(cb/dcmplx(scale,0.0d0)))**2) + alpha = ca /cdabs(ca) + c = cdabs(ca) / norm + s = alpha * dconjg(cb) / norm + ca = alpha * norm + 20 continue + return + end + subroutine zscal(n,za,zx,incx) +c +c scales a vector by a constant. +c jack dongarra, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex za,zx(*) + integer i,incx,ix,n +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + do 10 i = 1,n + zx(ix) = za*zx(ix) + ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + zx(i) = za*zx(i) + 30 continue + return + end + subroutine zswap (n,zx,incx,zy,incy) +c +c interchanges two vectors. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = zx(ix) + zx(ix) = zy(iy) + zy(iy) = ztemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 + 20 do 30 i = 1,n + ztemp = zx(i) + zx(i) = zy(i) + zy(i) = ztemp + 30 continue + return + end + SUBROUTINE ZSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of ZSYMM . +* + END + SUBROUTINE ZSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYR2K. +* + END + SUBROUTINE ZSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYRK . +* + END + SUBROUTINE ZTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( KPLUS1, J ) ) + DO 100, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 120, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( KPLUS1, J ) ) + DO 130, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( 1, J ) ) + DO 160, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 180, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( 1, J ) ) + DO 190, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTBMV . +* + END + SUBROUTINE ZTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A') )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 100, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( KPLUS1, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 130, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( KPLUS1, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 160, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( 1, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + IF( NOCONJ )THEN + DO 180, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 190, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( 1, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTBSV . +* + END + SUBROUTINE ZTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + K = KK - 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + DCONJG( AP( K ) )*X( I ) + K = K - 1 + 100 CONTINUE + END IF + X( J ) = TEMP + KK = KK - J + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 120, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 130, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + DCONJG( AP( K ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 140 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + K = KK + 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 160, I = J + 1, N + TEMP = TEMP + DCONJG( AP( K ) )*X( I ) + K = K + 1 + 160 CONTINUE + END IF + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 180, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 190, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + DCONJG( AP( K ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTPMV . +* + END + SUBROUTINE ZTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - DCONJG( AP( K ) )*X( I ) + K = K + 1 + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK + J - 1 ) ) + END IF + X( J ) = TEMP + KK = KK + J + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 120, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 130, K = KK, KK + J - 2 + TEMP = TEMP - DCONJG( AP( K ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK + J - 1 ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 140 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - DCONJG( AP( K ) )*X( I ) + K = K - 1 + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK - N + J ) ) + END IF + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - DCONJG( AP( K ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK - N + J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTPSV . +* + END + SUBROUTINE ZTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX*16 ALPHA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ) +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. +* + IF( UPPER )THEN + DO 120, J = 1, N + DO 110, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( I, I ) ) + DO 100, K = 1, I - 1 + TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J ) + 100 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 110 CONTINUE + 120 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = 1, M + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 130, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 130 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( I, I ) ) + DO 140, K = I + 1, M + TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J ) + 140 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 200, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 170, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 170 CONTINUE + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 180, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE + DO 240, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 210, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 210 CONTINUE + DO 230, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 220, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 220 CONTINUE + END IF + 230 CONTINUE + 240 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). +* + IF( UPPER )THEN + DO 280, K = 1, N + DO 260, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*DCONJG( A( J, K ) ) + END IF + DO 250, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*DCONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + ELSE + DO 320, K = N, 1, -1 + DO 300, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*DCONJG( A( J, K ) ) + END IF + DO 290, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*DCONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 310, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 310 CONTINUE + END IF + 320 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRMM . +* + END + SUBROUTINE ZTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + DCONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 120, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 130, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + DCONJG( A( I, J ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 160, I = J + 1, N + TEMP = TEMP + DCONJG( A( I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 180, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 190, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + DCONJG( A( I, J ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRMV . +* + END + SUBROUTINE ZTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX*16 ALPHA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B +* or B := alpha*inv( conjg( A' ) )*B. +* + IF( UPPER )THEN + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 120, K = 1, I - 1 + TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J ) + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 130 CONTINUE + 140 CONTINUE + ELSE + DO 180, J = 1, N + DO 170, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 150, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 160, K = I + 1, M + TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 170 CONTINUE + 180 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 230, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 190, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 190 CONTINUE + END IF + DO 210, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 200, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 220, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 220 CONTINUE + END IF + 230 CONTINUE + ELSE + DO 280, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 240, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 240 CONTINUE + END IF + DO 260, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 250, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 270, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 270 CONTINUE + END IF + 280 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ) +* or B := alpha*B*inv( conjg( A' ) ). +* + IF( UPPER )THEN + DO 330, K = N, 1, -1 + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/DCONJG( A( K, K ) ) + END IF + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + DO 310, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = DCONJG( A( J, K ) ) + END IF + DO 300, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 320, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 320 CONTINUE + END IF + 330 CONTINUE + ELSE + DO 380, K = 1, N + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/DCONJG( A( K, K ) ) + END IF + DO 340, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 340 CONTINUE + END IF + DO 360, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = DCONJG( A( J, K ) ) + END IF + DO 350, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 370, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 370 CONTINUE + END IF + 380 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRSM . +* + END + SUBROUTINE ZTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 120, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 130, I = 1, J - 1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 180, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 190, I = N, J + 1, -1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRSV . +* + END diff --git a/src/byte_mpi.f b/src/byte_mpi.f new file mode 100644 index 0000000..5815646 --- /dev/null +++ b/src/byte_mpi.f @@ -0,0 +1,209 @@ + subroutine byte_sync_mpi(mpi_fh) + +#ifdef MPIIO + include 'mpif.h' + call MPI_file_sync(mpi_fh,ierr) +#endif + + return + end +C-------------------------------------------------------------------------- + subroutine byte_open_mpi(fname,mpi_fh) + + include 'SIZE' + include 'RESTART' + +#ifdef MPIIO + include 'mpif.h' + + character*132 fname + + if(nid.eq.pid0 .or. nid.eq.pid0r) then +c write(*,*) nid, 'call MPI_file_open',fname + call MPI_file_open(nekcomm_io,fname, + & MPI_MODE_RDWR+MPI_MODE_CREATE, + & MPI_INFO_NULL,mpi_fh,ierr) + if(ierr.ne.0) then + write(6,*) 'ABORT: Error in byte_open_mpi ', ierr + call exitt + endif + endif +#else + write(6,*) 'byte_open_mpi: No MPI-IO support!' + call exitt +#endif + return + end +C-------------------------------------------------------------------------- + subroutine byte_read_mpi(buf,icount,iorank,mpi_fh) + + include 'SIZE' + include 'RESTART' + +#ifdef MPIIO + include 'mpif.h' + + real*4 buf(1) ! buffer + + if(nid.eq.pid0 .or. nid.eq.pid0r) then + iout = 4*icount ! icount is in 4-byte words + if(iorank.ge.0 .and. nid.ne.iorank) iout = 0 +c write(*,*) 'byte_read_mpi', nid, iout/4 +#ifdef MPIIO_NOCOL + call MPI_file_read(mpi_fh,buf,iout,MPI_BYTE, + & MPI_STATUS_IGNORE,ierr) +#else + call MPI_file_read_all(mpi_fh,buf,iout,MPI_BYTE, + & MPI_STATUS_IGNORE,ierr) +#endif + if(ierr.ne.0) then + write(6,*) 'ABORT: Error in byte_read_mpi ', ierr + call exitt + endif + endif +#else + write(6,*) 'byte_read_mpi: No MPI-IO support!' + call exitt +#endif + + return + end +C-------------------------------------------------------------------------- + subroutine byte_write_mpi(buf,icount,iorank,mpi_fh) + + include 'SIZE' + include 'RESTART' + +#ifdef MPIIO + include 'mpif.h' + + real*4 buf(1) ! buffer + + if(nid.eq.pid0 .or. nid.eq.pid0r) then + iout = 4*icount ! icount is in 4-byte words + if(iorank.ge.0 .and. nid.ne.iorank) iout = 0 +c write(*,*) 'byte_write', nid, iout/4 +#ifdef MPIIO_NOCOL + call MPI_file_write(mpi_fh,buf,iout,MPI_BYTE, + & MPI_STATUS_IGNORE,ierr) +#else + call MPI_file_write_all(mpi_fh,buf,iout,MPI_BYTE, + & MPI_STATUS_IGNORE,ierr) +#endif + if(ierr.ne.0) then + write(6,*) 'ABORT: Error in byte_write_mpi ', ierr + call exitt + endif + endif +#else + write(6,*) 'byte_write_mpi: No MPI-IO support!' + call exitt +#endif + + return + end +C-------------------------------------------------------------------------- + subroutine byte_close_mpi(mpi_fh) + + include 'SIZE' + include 'RESTART' + +#ifdef MPIIO + include 'mpif.h' + if(nid.eq.pid0 .or. nid.eq.pid0r) then + call MPI_file_close(mpi_fh,ierr) + endif +#else + if(nid.eq.0) write(6,*) 'byte_close_mpi: No MPI-IO support!' + call exitt +#endif + + return + end +C-------------------------------------------------------------------------- + subroutine byte_set_view(ioff_in,mpi_fh) + + include 'SIZE' + include 'RESTART' + +#ifdef MPIIO + include 'mpif.h' + integer*8 ioff_in + + if(nid.eq.pid0 .or. nid.eq.pid0r) then + if(ioff_in.lt.0) then + write(6,*) 'byte_set_view: offset<0!' + call exitt + endif +c write(*,*) 'dataoffset', nid, ioff_in + call MPI_file_set_view(mpi_fh,ioff_in,MPI_BYTE,MPI_BYTE, + & 'native',MPI_INFO_NULL,ierr) + if(ierr.ne.0) then + write(6,*) 'ABORT: Error in byte_set_view ', ierr + call exitt + endif + endif +#endif + + return + end +C-------------------------------------------------------------------------- + subroutine nek_comm_io(nn) + + include 'SIZE' + include 'RESTART' + include 'PARALLEL' + +#ifdef MPIIO + include 'mpif.h' + common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal + common /scrns/ irank_io(0:lp-1) + +#ifdef MPIIO_NOCOL + if(nid.eq.0) then + j = 0 + if(nid.eq.pid0 .or. nid.eq.pid0r) then + irank_io(j) = nid + j = j + 1 + endif + do ir = 1,np-1 + call csend(ir,idum,4,ir,0) ! handshake + call crecv(ir,ibuf,4) + if(ibuf.gt.0) then + irank_io(j) = ibuf + j = j + 1 + endif + enddo + else + mtype = nid + ibuf = -1 + if(nid.eq.pid0) then + ibuf = nid + endif + call crecv(mtype,idum,4) ! hand-shake + call csend(mtype,ibuf,4,0,0) ! u4 :=: u8 + endif + + call bcast(irank_io,isize*nn) + +c write(6,*) 'nid', nid, (irank_io(i),i=0,nn-1) + + call mpi_comm_group (nekcomm,nekgroup,ierr) + if(ierr.gt.0) call exitt + call mpi_group_incl (nekgroup,nn,irank_io,nekgroup_io,ierr) + if(ierr.gt.0) call exitt + call mpi_comm_create(nekcomm,nekgroup_io,nekcomm_io,ierr) + if(ierr.gt.0) call exitt + call mpi_group_free (nekgroup_io,ierr) + if(ierr.gt.0) call exitt + call mpi_group_free (nekgroup,ierr) + if(ierr.gt.0) call exitt +#else + nekcomm_io = nekcomm + return +#endif + +#endif + + return + end diff --git a/src/cg.f b/src/cg.f new file mode 100644 index 0000000..b4214dd --- /dev/null +++ b/src/cg.f @@ -0,0 +1,335 @@ +#ifdef TIMERS +#define NBTIMER(a) a = dnekclock() +#define STIMER(a) a = dnekclock_sync() +#define ACCUMTIMER(b,a) b = b + (dnekclock()- a ) +#else +#define NBTIMER(a) +#define STIMER(a) +#define ACCUMTIMER(a,b) +#endif + + +c----------------------------------------------------------------------- + subroutine cg(x,f,g,c,r,w,p,z,n,niter,flop_cg) + +#if defined(XSMM_DISPATCH) + USE :: LIBXSMM +#endif + + include 'SIZE' + include 'TIMER' + + +c Solve Ax=f where A is SPD and is invoked by ax() +c +c Output: x - vector of length n +c +c Input: f - vector of length n +c Input: g - geometric factors for SEM operator +c Input: c - inverse of the counting matrix +c +c Work arrays: r,w,p,z - vectors of length n +c +c User-provided ax(w,z,n) returns w := Az, +c +c User-provided solveM(z,r,n) ) returns z := M^-1 r, +c + parameter (lt=lx1*ly1*lz1*lelt) +c real ur(lt),us(lt),ut(lt) + +c parameter (lxyz=lx1*ly1*lz1) +c real ur(lxyz),us(lxyz),ut(lxyz),wk(lxyz) + + real x(n),f(n),r(n),w(n),p(n),z(n),g(1),c(n) + real rnorminit, fbeta, fpap, falpha, frnorm + + integer*8 flop_cg + integer thread, numth, find, lind, fel, lel + integer omp_get_thread_num, omp_get_num_threads + integer fiter, tmt + + pap = 0.0 + +c set machine tolerances + one = 1. + eps = 1.e-20 + if (one+eps .eq. one) eps = 1.e-14 + if (one+eps .eq. one) eps = 1.e-7 + + rtz1=1.0 + miter = niter + +c$OMP PARALLEL DEFAULT(shared) PRIVATE(thread,numth,find,lind,iter, +c$OMP& fel,lel,rtz2,beta,alpha,alphm,rlim2,rtr0,tmt,ttemp1) + + thread = 0 + numth = 1 +#ifdef _OPENMP + thread = omp_get_thread_num() + numth = omp_get_num_threads() +#endif + tmt = thread + 1 + + if (numth < nelt) then + fel = (thread*nelt)/numth + 1 + lel = ((thread+1)*nelt)/numth + else + if (thread < nelt) then + fel = thread + 1 + lel = fel + else + fel = nelt+1 + lel = nelt + end if + end if + + find = (fel-1) *(nx1*ny1*nz1)+1 + lind = lel * (nx1*ny1*nz1) + + NBTIMER(ttemp1) + call rzeroi(x,n,find,lind) + ACCUMTIMER(trzero(tmt), ttemp1) + + NBTIMER(ttemp1) + call copyi(r,f,n,find,lind) + ACCUMTIMER(tcopy(tmt), ttemp1) + + if (thread == 0) call mask (r) ! Zero out Dirichlet conditions + + gopi(tmt)=1 + NBTIMER(ttemp1) + call glsc3i(rnorminit,r,c,r,n,find,lind) + ACCUMTIMER(tglsc3a(tmt), ttemp1) + + + do iter=1,miter +#ifdef LOG + if ((nid.eq.0) .and. (thread.eq.0)) write(*,*) "iter = ", iter +#endif + NBTIMER(ttemp1) + call solveMi(z,r,n,find,lind) ! preconditioner here + ACCUMTIMER(tsolvem(tmt), ttemp1) + + rtz2=rtz1 ! OPS + gopi(tmt)=2 + NBTIMER(ttemp1) + call glsc3i(rtz1,r,c,z,n,find,lind) + ACCUMTIMER(tglsc3b(tmt), ttemp1) + + beta = rtz1/rtz2 + if (iter.eq.1) beta=0.0 + + NBTIMER(ttemp1) + call add2s1i(p,z,beta,n,find,lind) ! 2n + ACCUMTIMER(tadd2s1(tmt), ttemp1) + + call axi(w,p,g,n,fel,lel,find,lind) ! flopa + + gopi(tmt)=3 + NBTIMER(ttemp1) + call glsc3i(pap, w,c,p,n,find,lind) ! 3n + ACCUMTIMER(tglsc3c(tmt), ttemp1) + + alpha=rtz1/pap + alphm=-alpha + + NBTIMER(ttemp1) + call add2s2i(x,p,alpha,n,find,lind) ! 2n + ACCUMTIMER(tadd2s2b(tmt), ttemp1) + + NBTIMER(ttemp1) + call add2s2i(r,w,alphm,n,find,lind) ! 2n + ACCUMTIMER(tadd2s2c(tmt), ttemp1) + + gopi(tmt)=4 + NBTIMER(ttemp1) + call glsc3i(rtr, r,c,r,n,find,lind) ! 3n + ACCUMTIMER(tglsc3d(tmt), ttemp1) + + if (iter.eq.1) rlim2 = rtr*eps**2 + if (iter.eq.1) rtr0 = rtr + rnorm = sqrt(rtr) + + enddo + + if (thread == 0) then + fiter = iter + fbeta = beta + falpha= alpha + fpap = pap + frnorm = rnorm + end if + +c$OMP END PARALLEL + + 6 format('cg:',i4,1p4e12.4) + + if (nid.eq.0) then + write(6,6) 0,sqrt(rnorminit) + write(6,6) fiter,frnorm,falpha,fbeta,fpap + end if + + flop_cg = flop_cg + (fiter-1)*15_8*n + 3_8*n + + return + end +c----------------------------------------------------------------------- + subroutine solveM(z,r,n) + real z(n),r(n) + + call copy(z,r,n) + + return + end +c----------------------------------------------------------------------- + subroutine axi(w,u,gxyz,n,fel,lel,find,lind) ! Matrix-vector product: w=A*u + + include 'SIZE' + include 'TOTAL' + include 'TIMER' + + parameter (lxyz=lx1*ly1*lz1) + real w(nx1*ny1*nz1,nelt),u(nx1*ny1*nz1,nelt) + real gxyz(2*ldim,nx1*ny1*nz1,nelt) + parameter (lt=lx1*ly1*lz1*lelt) + + integer fel, lel, find, lind + integer e,thread, tmt, omp_get_thread_num + + thread = 0 +#ifdef _OPENMP + thread = omp_get_thread_num() +#endif + tmt = thread + 1 + + do e= fel, lel + call ax_e( w(1,e),u(1,e),gxyz(1,1,e)) + enddo + + NBTIMER(ttemp2) + call gs_op(gsh,w,1,1,0) ! Gather-scatter operation ! w = QQ w + ACCUMTIMER(tgsop(tmt),ttemp2) + ! L + NBTIMER(ttemp2) + call add2s2i(w,u,.1,n,find,lind) + ACCUMTIMER(tadd2s2a(tmt),ttemp2) + + if (find == 1) then + call mask(w) ! Zero out Dirichlet conditions + nxyz=nx1*ny1*nz1 + flop_a = flop_a + (19_8*nxyz+12_8*nx1*nxyz)*nelt + end if + + return + end +c------------------------------------------------------------------------- + subroutine ax1(w,u,n) + include 'SIZE' + real w(n),u(n) + real h2i + + h2i = (n+1)*(n+1) + do i = 2,n-1 + w(i)=h2i*(2*u(i)-u(i-1)-u(i+1)) + enddo + w(1) = h2i*(2*u(1)-u(2 )) + w(n) = h2i*(2*u(n)-u(n-1)) + + return + end +c------------------------------------------------------------------------- + subroutine ax_e(w,u,g) ! Local matrix-vector product + + include 'SIZE' + include 'TOTAL' + include 'TIMER' + + parameter (lxyz=lx1*ly1*lz1) + real w(lxyz),u(lxyz),g(2*ldim,lxyz) + real ur(nx1*ny1*nz1),us(nx1*ny1*nz1),ut(nx1*ny1*nz1) + integer thread, tmt, omp_get_thread_num + + thread = 0 +#ifdef _OPENMP + thread = omp_get_thread_num() +#endif + tmt = thread + 1 + + nxyz = nx1*ny1*nz1 + n = nx1-1 + + NBTIMER(ttemp3) + call local_grad3(ur,us,ut,u,n,dxm1,dxtm1) + ACCUMTIMER(tlocalgrad3(tmt),ttemp3) + + NBTIMER(ttemp3) + do i=1,nxyz + wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i) + ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i) + wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i) + ur(i) = wr + us(i) = ws + ut(i) = wt + enddo + ACCUMTIMER(twrwswt(tmt),ttemp3) + + NBTIMER(ttemp3) + call local_grad3_t(w,ur,us,ut,n,dxm1,dxtm1) + ACCUMTIMER(tlocalgrad3t(tmt),ttemp3) + + return + end +c------------------------------------------------------------------------- + subroutine local_grad3(ur,us,ut,u,n,D,Dt) +c Output: ur,us,ut Input:u,n,D,Dt + real ur(0:n,0:n,0:n),us(0:n,0:n,0:n),ut(0:n,0:n,0:n) + real u (0:n,0:n,0:n) + real D (0:n,0:n),Dt(0:n,0:n) + integer e + + m1 = n+1 + m2 = m1*m1 + + call mxm(D ,m1,u,m1,ur,m2) + do k=0,n + call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1) + enddo + call mxm(u,m2,Dt,m1,ut,m1) + + return + end +c----------------------------------------------------------------------- + subroutine local_grad3_t(u,ur,us,ut,N,D,Dt) +c Output: ur,us,ut Input:u,N,D,Dt + real u (0:N,0:N,0:N) + real ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) + real D (0:N,0:N),Dt(0:N,0:N) + real w (0:N,0:N,0:N) + integer e + + m1 = N+1 + m2 = m1*m1 + m3 = m1*m1*m1 + + call mxm(Dt,m1,ur,m1,u,m2) + + do k=0,N + call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1) + enddo + call add2(u,w,m3) + + call mxm(ut,m2,D ,m1,w,m1) + call add2(u,w,m3) + + return + end +c----------------------------------------------------------------------- + subroutine mask(w) ! Zero out Dirichlet conditions + include 'SIZE' + real w(1) + + if (nid.eq.0) w(1) = 0. ! suitable for solvability + + return + end +c----------------------------------------------------------------------- diff --git a/src/comm_mpi.f b/src/comm_mpi.f new file mode 100644 index 0000000..26f1bc0 --- /dev/null +++ b/src/comm_mpi.f @@ -0,0 +1,1212 @@ +c----------------------------------------------------------------------- + subroutine iniproc(intracomm) + include 'SIZE' + include 'PARALLEL' + include 'mpif.h' + + common /nekmpi/ nid_,np_,nekcomm,nekgroup,nekreal + + logical flag + integer provided + + call mpi_initialized(mpi_is_initialized, ierr) ! Initialize MPI + if ( mpi_is_initialized .eq. 0 ) then +#ifdef MPITHREADS + call mpi_init_thread (MPI_THREAD_MULTIPLE,provided,ierr) +#else + call mpi_init (ierr) +#endif + endif + + ! create communicator + call init_nek_comm(intracomm) + np = np_ + nid = nid_ + + if(nid.eq.0) call printHeader + + ! check upper tag size limit + call mpi_attr_get(MPI_COMM_WORLD,MPI_TAG_UB,nval,flag,ierr) + if (nval.lt.(10000+max(lp,lelg))) then + if(nid.eq.0) write(6,*) 'WARNING: MPI_TAG_UB too small!', nval +c call exitt + endif + + IF (NP.GT.LP) THEN + WRITE(6,*) + $ 'ERROR: Code compiled for a max of',LP,' processors.' + WRITE(6,*) + $ 'Recompile with LP =',NP,' or run with fewer processors.' + WRITE(6,*) + $ 'Aborting in routine INIPROC.' + call exitt + endif + + ! set word size for REAL + wdsize=4 + eps=1.0e-12 + oneeps = 1.0+eps + if (oneeps.ne.1.0) then + wdsize=8 + else + if(nid.eq.0) + & write(6,*) 'ABORT: single precision mode not supported!' + call exitt + endif + nekreal = mpi_real + if (wdsize.eq.8) nekreal = mpi_double_precision + + ifdblas = .false. + if (wdsize.eq.8) ifdblas = .true. + + ! set word size for INTEGER + ! HARDCODED since there is no secure way to detect an int overflow + isize = 4 + + ! set word size for LOGICAL + lsize = 4 + + ! set word size for CHARACTER + csize = 1 +c + PID = 0 + NULLPID=0 + NODE0=0 + NODE= NID+1 + + if (nid.eq.0) then + write(6,*) 'Number of processors:',np + WRITE(6,*) 'REAL wdsize :',WDSIZE + WRITE(6,*) 'INTEGER wdsize :',ISIZE + endif + + call crystal_setup(cr_h,nekcomm,np) ! set cr handle to new instance + + return + end +c----------------------------------------------------------------------- + subroutine init_nek_comm(intracomm) + include 'mpif.h' + common /nekmpi/ nid_,np_,nekcomm,nekgroup,nekreal +C + call create_comm(intracomm) ! set up nekton specific communicator +c + nid_ = mynode() + np_ = numnodes() +c + return + end +c----------------------------------------------------------------------- + subroutine gop( x, w, op, n) +c +c Global vector commutative operation +c + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal +c + real x(n), w(n) + character*3 op +c + if (op.eq.'+ ') then + call mpi_allreduce (x,w,n,nekreal,mpi_sum ,nekcomm,ierr) + elseif (op.EQ.'M ') then + call mpi_allreduce (x,w,n,nekreal,mpi_max ,nekcomm,ierr) + elseif (op.EQ.'m ') then + call mpi_allreduce (x,w,n,nekreal,mpi_min ,nekcomm,ierr) + elseif (op.EQ.'* ') then + call mpi_allreduce (x,w,n,nekreal,mpi_prod,nekcomm,ierr) + else + write(6,*) nid,' OP ',op,' not supported. ABORT in GOP.' + call exitt + endif + + call copy(x,w,n) + + return + end +c----------------------------------------------------------------------- + subroutine igop( x, w, op, n) +c +c Global vector commutative operation +c + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + + integer x(n), w(n) + character*3 op + + if (op.eq.'+ ') then + call mpi_allreduce (x,w,n,mpi_integer,mpi_sum ,nekcomm,ierr) + elseif (op.EQ.'M ') then + call mpi_allreduce (x,w,n,mpi_integer,mpi_max ,nekcomm,ierr) + elseif (op.EQ.'m ') then + call mpi_allreduce (x,w,n,mpi_integer,mpi_min ,nekcomm,ierr) + elseif (op.EQ.'* ') then + call mpi_allreduce (x,w,n,mpi_integer,mpi_prod,nekcomm,ierr) + else + write(6,*) nid,' OP ',op,' not supported. ABORT in igop.' + call exitt + endif + + call icopy(x,w,n) + + return + end +c----------------------------------------------------------------------- + subroutine i8gop( x, w, op, n) +c +c Global vector commutative operation +c + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + + integer*8 x(n), w(n) + character*3 op + + if (op.eq.'+ ') then + call mpi_allreduce (x,w,n,mpi_integer8,mpi_sum ,nekcomm,ierr) + elseif (op.EQ.'M ') then + call mpi_allreduce (x,w,n,mpi_integer8,mpi_max ,nekcomm,ierr) + elseif (op.EQ.'m ') then + call mpi_allreduce (x,w,n,mpi_integer8,mpi_min ,nekcomm,ierr) + elseif (op.EQ.'* ') then + call mpi_allreduce (x,w,n,mpi_integer8,mpi_prod,nekcomm,ierr) + else + write(6,*) nid,' OP ',op,' not supported. ABORT in igop.' + call exitt + endif + + call i8copy(x,w,n) + + return + end +c----------------------------------------------------------------------- + subroutine csend(mtype,buf,len,jnid,jpid) + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + real*4 buf(1) + + call mpi_send (buf,len,mpi_byte,jnid,mtype,nekcomm,ierr) + + return + end +c----------------------------------------------------------------------- + subroutine crecv(mtype,buf,lenm) + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + integer status(mpi_status_size) +C + real*4 buf(1) + len = lenm + jnid = mpi_any_source + + call mpi_recv (buf,len,mpi_byte + $ ,jnid,mtype,nekcomm,status,ierr) +c + if (len.gt.lenm) then + write(6,*) nid,'long message in mpi_crecv:',len,lenm + call exitt + endif +c + return + end +c----------------------------------------------------------------------- + subroutine crecv3(mtype,buf,len,lenm) + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + integer status(mpi_status_size) +C + real*4 buf(1) + len = lenm + jnid = mpi_any_source + + call mpi_recv (buf,len,mpi_byte + $ ,jnid,mtype,nekcomm,status,ierr) + call mpi_get_count (status,mpi_byte,len,ierr) +c + if (len.gt.lenm) then + write(6,*) nid,'long message in mpi_crecv:',len,lenm + call exitt + endif +c + return + end +c----------------------------------------------------------------------- + integer function numnodes() + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + + call mpi_comm_size (nekcomm, numnodes , ierr) + + return + end +c----------------------------------------------------------------------- + integer function mynode() + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + integer myid + + call mpi_comm_rank (nekcomm, myid, ierr) + mynode = myid + + return + end +c----------------------------------------------------------------------- + real*8 function dnekclock() + implicit none + +#if defined (MPITIMER) + include 'mpif.h' + dnekclock=mpi_wtime() +#elif defined (BGQTIMER) + double precision readtimebase_double + external readtimebase_double + dnekclock = 0.625D-9*ReadTimeBase_Double() +#elif defined (CGTTIMER) + double precision fclock_gettime + external fclock_gettime + dnekclock = fclock_gettime() +#else + integer*8 countval, countrate, countmax + double precision countd + call system_clock(countval, countrate, countmax) + countd = countval + dnekclock = countd/countrate +#endif + + return + end +c----------------------------------------------------------------------- + real*8 function dnekclock_sync() + real*8 dnekclock + external dnekclock +c + call nekgsync() + dnekclock_sync=dnekclock() +c + return + end +c----------------------------------------------------------------------- + subroutine lbcast(ifif) +C +C Broadcast logical variable to all processors. +C + include 'SIZE' + include 'PARALLEL' + include 'mpif.h' + + logical ifif + + if (np.eq.1) return + + item=0 + if (ifif) item=1 + call bcast(item,isize) + ifif=.false. + if (item.eq.1) ifif=.true. + + return + end +c----------------------------------------------------------------------- + subroutine bcast(buf,len) + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + real*4 buf(1) + + call mpi_bcast (buf,len,mpi_byte,0,nekcomm,ierr) + + return + end +c----------------------------------------------------------------------- + subroutine create_comm(intracomm) + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + +c call mpi_comm_group (mpi_comm_world,itmp,ierr) +c call mpi_comm_create (mpi_comm_world,itmp,icomm,ierr) +c call mpi_group_free (itmp,ierr) + + call mpi_comm_dup(intracomm,nekcomm,ierr) + +c write(6,*) 'nekcomm:',nekcomm + + return + end +c----------------------------------------------------------------------- + function isend(msgtag,x,len,jnid,jpid) +c +c Note: len in bytes +c + integer x(1) +C + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal +C + call mpi_isend (x,len,mpi_byte,jnid,msgtag + $ ,nekcomm,imsg,ierr) + isend = imsg +c write(6,*) nid,' isend:',imsg,msgtag,len,jnid,(x(k),k=1,len/4) +c + return + end +c----------------------------------------------------------------------- + function irecv(msgtag,x,len) +c +c Note: len in bytes +c + integer x(1) +C + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal +C + call mpi_irecv (x,len,mpi_byte,mpi_any_source,msgtag + $ ,nekcomm,imsg,ierr) + irecv = imsg +c write(6,*) nid,' irecv:',imsg,msgtag,len +c +c + return + end +c----------------------------------------------------------------------- + subroutine msgwait(imsg) +c + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + integer status(mpi_status_size) +c +c write(6,*) nid,' msgwait:',imsg +c + call mpi_wait (imsg,status,ierr) +c + return + end +c----------------------------------------------------------------------- + subroutine nekgsync() + + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + + call mpi_barrier(nekcomm,ierr) + + return + end +c----------------------------------------------------------------------- + subroutine exittr(stringi,rdata,idata) + character*1 stringi(132) + character*1 stringo(132) + character*25 s25 + include 'SIZE' + include 'TOTAL' + + call blank(stringo,132) + call chcopy(stringo,stringi,132) + len = indx1(stringo,'$',1) + write(s25,25) rdata,idata + 25 format(1x,1p1e14.6,i10) + call chcopy(stringo(len),s25,25) + + if (nid.eq.0) write(6,1) (stringo(k),k=1,len+24) + 1 format('EXIT: ',132a1) + + call exitt + + return + end +c----------------------------------------------------------------------- + subroutine exitti(stringi,idata) + character*1 stringi(132) + character*1 stringo(132) + character*11 s11 + include 'SIZE' + include 'TOTAL' + + call blank(stringo,132) + call chcopy(stringo,stringi,132) + len = indx1(stringo,'$',1) + write(s11,11) idata + 11 format(1x,i10) + call chcopy(stringo(len),s11,11) + + if (nid.eq.0) write(6,1) (stringo(k),k=1,len+10) + 1 format('EXIT: ',132a1) + + call exitt + + return + end +c----------------------------------------------------------------------- + subroutine err_chk(ierr,string) + character*1 string(132) + character*1 ostring(132) + character*10 s10 + include 'SIZE' + include 'TOTAL' + + ierr = iglsum(ierr,1) + if(ierr.eq.0) return + + len = indx1(string,'$',1) + call blank(ostring,132) + write(s10,11) ierr + 11 format(1x,' ierr=',i3) + + call chcopy(ostring,string,len-1) + call chcopy(ostring(len),s10,10) + + if (nid.eq.0) write(6,1) (ostring(k),k=1,len+10) + 1 format('ERROR: ',132a1) + + call exitt + + return + end +c +c----------------------------------------------------------------------- + subroutine exitt0 + include 'SIZE' + include 'TOTAL' + include 'mpif.h' + +#ifdef BGQ +#define M_EXIT(X) exit_((X)) +#else +#define M_EXIT(X) exit((X)) +#endif + + real*4 papi_mflops + integer*8 papi_flops + + if(nid.eq.0) write(6,*) 'Exitting....' + +c call print_stack() + + call mpi_finalize (ierr) + call M_EXIT(0) + + return + end +c----------------------------------------------------------------------- + subroutine exitt + include 'SIZE' + include 'TOTAL' + include 'mpif.h' + + real*4 papi_mflops + integer*8 papi_flops +c + call nekgsync() + +#ifdef PAPI + call nek_flops(papi_flops,papi_mflops) +#endif + + tstop = dnekclock() + ttotal = tstop-etimes + nxyz = nx1*ny1*nz1 + + if (nid.eq.0) then + dtmp1 = 0 + dtmp2 = 0 + dtmp3 = 0 + if(istep.gt.0) then + dgp = nvtot + dgp = max(dgp,1.) + dtmp1 = np*ttime/(dgp*max(istep,1)) + dtmp2 = ttime/max(istep,1) + dtmp3 = 1.*papi_flops/1e6 + endif + write(6,*) ' ' + write(6,'(A)') 'call exitt: dying ...' + write(6,*) ' ' +c call print_stack() + write(6,*) ' ' + write(6,'(4(A,1p1e13.5,A,/))') + & 'total elapsed time : ',ttotal, ' sec' + & ,'total solver time incl. I/O : ',ttime , ' sec' + & ,'time/timestep : ',dtmp2 , ' sec' + & ,'CPU seconds/timestep/gridpt : ',dtmp1 , ' sec' +#ifdef PAPI + write(6,'(2(A,1g13.5,/))') + & 'Gflops : ',dtmp3/1000. + & ,'Gflops/s : ',papi_mflops/1000. +#endif + endif + + nz1 = 1/(nx1-ny1) + + call mpi_finalize (ierr) + call exit(0) + + return + end +c----------------------------------------------------------------------- + subroutine printHeader + + + return + end +c----------------------------------------------------------------------- + function igl_running_sum(in) +c + include 'mpif.h' + common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal + integer status(mpi_status_size) + integer x,w,r + + x = in ! running sum + w = in ! working buff + r = 0 ! recv buff + + call mpi_scan(x,r,1,mpi_integer,mpi_sum,nekcomm,ierr) + igl_running_sum = r + + return + end +c----------------------------------------------------------------------- + subroutine platform_timer(ivb) ! mxm, ping-pong, and all_reduce timer + + include 'SIZE' + include 'TOTAL' + + + call mxm_test_all(nid,ivb) ! measure mxm times +c call exitti('done mxm_test_all$',ivb) + + call comm_test(ivb) ! measure message-passing and all-reduce times + + return + end +c----------------------------------------------------------------------- + subroutine comm_test(ivb) ! measure message-passing and all-reduce times + ! ivb = 0 --> minimal verbosity + ! ivb = 1 --> fully verbose + ! ivb = 2 --> smaller sample set(shorter) + + include 'SIZE' + include 'PARALLEL' + + call gop_test(ivb) ! added, Jan. 8, 2008 + + log_np=log2(np) + np2 = 2**log_np + if (np2.eq.np) call gp2_test(ivb) ! added, Jan. 8, 2008 + + io = 6 + n512 = min(512,np-1) + + do nodec=1,n512 + nodeb=nodec + if (nodec.gt.256.and.np.gt.512) + $ nodeb = 256+(np-256)*(nodec-256)/(n512-256) - 1 + nodeb = min(nodeb,np-1) + + call pingpong(alphas,betas,0,nodeb,.0005,io,ivb) + if (nid.eq.0) write(6,1) nodeb,np,alphas,betas + 1 format(2i10,1p2e15.7,' alpha beta') + enddo + + return + end +c----------------------------------------------------------------------- + subroutine pingpong(alphas,betas,nodea,nodeb,dt,io,ivb) + + include 'SIZE' + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + + parameter (lt=lx1*ly1*lz1*lelt) + parameter (mwd1 = 3*lt,mwd2=100 000, mwd = max(mwd1,mwd2)) + common /scrns/ x(mwd),y(mwd) + + include 'mpif.h' + integer status(mpi_status_size) + + character*10 fname + + if (nid.eq.nodea) then + write(fname,3) np,nodeb + 3 format('t',i4.4,'.',i4.4) + if (io.ne.6) open (unit=io,file=fname) + endif + + call nekgsync + call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s + + nwds = 0 + if (nid.eq.nodea.and.ivb.gt.0) write(io,*) + + betas = 0 ! Reported inverse bandwidth + count = 0 + + do itest = 1,500 + + nloop = msg_vol/(nwds+2) + nloop = min(nloop,1000) + nloop = max(nloop,1) + + len = 8*nwds + + call ping_loop(t1,t0,len,nloop,nodea,nodeb,nid,x,y,x,y) + + if (nid.eq.nodea) then + tmsg = (t1-t0)/(2*nloop) ! 2*nloop--> Double Buffer + tmsg = tmsg / 2. ! one-way cost = 1/2 round-trip + tpwd = tmsg ! time-per-word + if (nwds.gt.0) tpwd = tmsg/nwds + if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd + 1 format(3i6,i12,1p2e16.8,' pg') + + if (nwds.eq.1) then + alphas = tmsg + elseif (nwds.gt.10000) then ! "average" beta + betas = (betas*count + tpwd)/(count+1) + count = count + 1 + endif + endif + + if (ivb.eq.2) then + nwds = (nwds+1)*1.25 + else + nwds = (nwds+1)*1.016 + endif + if (nwds.gt.mwd) then +c if (nwds.gt.1024) then + if (nid.eq.nodea.and.io.ne.6) close(unit=io) + call nekgsync + return + endif + + enddo + + if (nid.eq.nodea.and.io.ne.6) close(unit=io) + call nekgsync + + return + end +c----------------------------------------------------------------------- + subroutine pingpongo(alphas,betas,nodea,nodeb,dt,io,ivb) + + include 'SIZE' + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + + parameter (lt=lx1*ly1*lz1*lelt) + parameter (mwd1 = 3*lt,mwd2=100 000, mwd = max(mwd1,mwd2)) + common /scrns/ x(mwd),y(mwd) + + include 'mpif.h' + integer status(mpi_status_size) + + character*10 fname + + if (nid.eq.nodea) then + write(fname,3) np,nodeb + 3 format('t',i4.4,'.',i4.4) + if (io.ne.6) open (unit=io,file=fname) + endif + + call nekgsync + call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s + + nwds = 0 + if (nid.eq.nodea.and.ivb.gt.0) write(io,*) + + betas = 0 ! Reported inverse bandwidth + count = 0 + + do itest = 1,500 + call nekgsync + nloop = msg_vol/(nwds+2) + nloop = min(nloop,1000) + nloop = max(nloop,1) + + len = 8*nwds + jnid = mpi_any_source + + if (nid.eq.nodea) then + + msg = irecv(itest,y,1) + call csend(itest,x,1,nodeb,0) ! Initiate send, to synch. + call msgwait(msg) + + t0 = mpi_wtime () + do i=1,nloop + call mpi_irecv(y,len,mpi_byte,mpi_any_source,i + $ ,nekcomm,msg,ierr) + call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) + call mpi_wait (msg,status,ierr) + enddo + t1 = mpi_wtime () + tmsg = (t1-t0)/nloop + tmsg = tmsg / 2. ! Round-trip message time = twice one-way + tpwd = tmsg + if (nwds.gt.0) tpwd = tmsg/nwds + if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd + 1 format(3i6,i12,1p2e16.8,' pg') + + if (nwds.eq.1) then + alphas = tmsg + elseif (nwds.gt.10000) then + betas = (betas*count + tpwd)/(count+1) + count = count + 1 + endif + + elseif (nid.eq.nodeb) then + + call crecv(itest,y,1) ! Initiate send, to synch. + call csend(itest,x,1,nodea,0) + + t0 = dnekclock() + do i=1,nloop + call mpi_recv (y,len,mpi_byte + $ ,jnid,i,nekcomm,status,ierr) + call mpi_send (x,len,mpi_byte,nodea,i,nekcomm,ierr) + enddo + t1 = dnekclock() + tmsg = (t1-t0)/nloop + + endif + + nwds = (nwds+1)*1.016 + if (nwds.gt.mwd) then + if (nid.eq.nodea.and.io.ne.6) close(unit=io) + call nekgsync + return + endif + + enddo + + if (nid.eq.nodea.and.io.ne.6) close(unit=io) + call nekgsync + + return + end +c----------------------------------------------------------------------- + subroutine get_msg_vol(msg_vol,dt,nodea,nodeb) + include 'SIZE' + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + parameter (lt=lx1*ly1*lz1*lelt) + common /scrns/ x(3*lt),y(3*lt) +! +! Est. msg vol for dt s +! + msg_vol = 1000 + + nwds = min(1000,lt) + nloop = 50 + + tmsg = 0. + call gop(tmsg,t1,'+ ',1) + + len = 8*nwds + if (nid.eq.nodea) then + + msg = irecv(1,y,1) + call csend(1,x,1,nodeb,0) ! Initiate send, to synch. + call msgwait(msg) + + t0 = dnekclock() + do i=1,nloop + msg = irecv(i,y,len) + call csend(i,x,len,nodeb,0) + call msgwait(msg) + enddo + t1 = dnekclock() + tmsg = (t1-t0)/nloop + tpwd = tmsg/nwds + + elseif (nid.eq.nodeb) then + + call crecv(1,y,1) ! Initiate send, to synch. + call csend(1,x,1,nodea,0) + + t0 = dnekclock() + do i=1,nloop + call crecv(i,y,len) + call csend(i,x,len,nodea,0) + enddo + t1 = dnekclock() + tmsg = (t1-t0)/nloop + tmsg = 0. + + endif + + call gop(tmsg,t1,'+ ',1) + msg_vol = nwds*(dt/tmsg) +c if (nid.eq.nodea) write(6,*) nid,msg_vol,nwds,dt,tmsg,' msgvol' + + return + end +c----------------------------------------------------------------------- + subroutine gop_test(ivb) + include 'SIZE' + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + include 'mpif.h' + integer status(mpi_status_size) + + parameter (lt=lx1*ly1*lz1*lelt) + parameter (mwd = 3*lt) + common /scrns/ x(mwd),y(mwd) + common /scruz/ times(2,500) + common/scrcg/nwd(500) + + + nwds = 1 + mtest = 0 + do itest = 1,500 + nwds = (nwds+1)*1.016 + if (nwds.gt.mwd) goto 100 + mtest = mtest+1 + nwd(mtest) = nwds + enddo + 100 continue + + nwds = 1 + do itest = mtest,1,-1 + + tiny = 1.e-27 + call cfill(x,tiny,mwd) + nwds = nwd(itest) + call nekgsync + + + t0 = mpi_wtime () + call gop(x,y,'+ ',nwds) + call gop(x,y,'+ ',nwds) + call gop(x,y,'+ ',nwds) + call gop(x,y,'+ ',nwds) + call gop(x,y,'+ ',nwds) + call gop(x,y,'+ ',nwds) + t1 = mpi_wtime () + + tmsg = (t1-t0)/6 ! six calls + tpwd = tmsg + if (nwds.gt.0) tpwd = tmsg/nwds + times(1,itest) = tmsg + times(2,itest) = tpwd + + enddo + 101 continue + + + if (nid.eq.0) then + nwds = 1 + do itest=1,500 + if (ivb.gt.0.or.itest.eq.1) + $ write(6,1) np,nwds,(times(k,itest),k=1,2) + 1 format(i9,i12,1p2e16.8,' gop') + nwds = (nwds+1)*1.016 + if (nwds.gt.mwd) goto 102 + enddo + 102 continue + endif + + return + end +c----------------------------------------------------------------------- + subroutine gp2_test(ivb) + + include 'SIZE' + include 'mpif.h' + + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + integer status(mpi_status_size) + + parameter (lt=lx1*ly1*lz1*lelt) + parameter (mwd = 3*lt) + common /scrns/ x(mwd),y(mwd) + common /scruz/ times(2,500) + + call rzero(x,mwd) + + nwds = 1 + do itest = 1,500 + call gp2(x,y,'+ ',1,nid,np) + + t0 = mpi_wtime () + call gp2(x,y,'+ ',nwds,nid,np) + call gp2(x,y,'+ ',nwds,nid,np) + call gp2(x,y,'+ ',nwds,nid,np) + call gp2(x,y,'+ ',nwds,nid,np) + t1 = mpi_wtime () + + tmsg = (t1-t0)/4 ! four calls + tpwd = tmsg + if (nwds.gt.0) tpwd = tmsg/nwds + times(1,itest) = tmsg + times(2,itest) = tpwd + + nwds = (nwds+1)*1.016 + if (nwds.gt.mwd) goto 101 + enddo + 101 continue + + + if (nid.eq.0) then + nwds = 1 + do itest=1,500 + if (ivb.gt.0.or.itest.eq.1) + $ write(6,1) np,nwds,(times(k,itest),k=1,2) + 1 format(i9,i12,1p2e16.8,' gp2') + nwds = (nwds+1)*1.016 + if (nwds.gt.mwd) goto 102 + enddo + 102 continue + endif + + return + end +c----------------------------------------------------------------------- + integer function xor(m,n) +c +c If NOT running on a parallel processor, it is sufficient to +c have this routine return a value of XOR=1. +c +c Pick one of the following: +c +c UNIX 4.2, f77: + XOR = OR(M,N)-AND(M,N) +c +c Intel FTN286: +c XOR = M.NEQV.N +c +c Ryan-McFarland Fortran +C XOR = IEOR(M,N) +c +c XOR = 0 +c IF(M.EQ.1 .OR. N.EQ.1) XOR=1 +c IF(M.EQ.0 .AND. N.EQ.0) XOR=0 +c IF(M.EQ.1 .AND. N.EQ.1) XOR=0 +c IF(M.GT.1 .OR.N.GT.1 .OR.M.LT.0.OR.N.LT.0) THEN +c PRINT*,'ERROR IN XOR' +c STOP +c ENDIF +C + return + end +c----------------------------------------------------------------------- + subroutine gp2( x, w, op, n, nid, np) +c +c Global vector commutative operation using spanning tree. +c +c Std. fan-in/fan-out + + real x(n), w(n) + character*3 op + + integer bit, bytes, cnt, diff, spsize, i, + * parent, troot, xor, root, lnp, log2 + logical ifgot + + integer type + save type + data type /998/ + + type = type+100 + if (type.gt.9992) type=type-998 + typer = type-1 + bytes = 8*n + + root = 0 + troot = max0((nid/np)*np, root) + diff = xor(nid,troot) + nullpid = 0 + +c Accumulate contributions from children, if any + level2=1 + 5 continue + level=level2 + level2=level+level + if (mod(nid,level2).ne.0) goto 20 + call crecv(type,w,bytes) + if (op.eq.'+ ') then + do i=1,n + x(i) = x(i) + w(i) + enddo + elseif (op.eq.'* ') then + do i=1,n + x(i) = x(i) * w(i) + enddo + elseif (op.eq.'M ') then + do i=1,n + x(i) = max(x(i),w(i)) + enddo + elseif (op.eq.'m ') then + do i=1,n + x(i) = min(x(i),w(i)) + enddo + endif + if (level2.lt.np) goto 5 + +c Pass result back to parent + 20 parent = nid-level + if (nid .ne. 0) call csend(type,x,bytes,parent,nullpid) + +c Await final answer from node 0 via log_2 fan out + level=np/2 + ifgot=.false. + if (nid.eq.root) ifgot=.true. + + lnp = log2(np) + do i=1,lnp + if (ifgot) then + jnid=nid+level + call csend(typer,x,bytes,jnid,nullpid) + elseif (mod(nid,level).eq.0) then + call crecv(typer,x,bytes) + ifgot=.true. + endif + level=level/2 + enddo + + return + end +c----------------------------------------------------------------------- + subroutine ping_loop1(t1,t0,len,nloop,nodea,nodeb,nid,x,y) + + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + + real x(1),y(1) + + include 'mpif.h' + integer status(mpi_status_size) + + i=0 + if (nid.eq.nodea) then + call nekgsync + call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 1b + call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a +c call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a + call msgwait(msg) ! 1b + + t0 = mpi_wtime () + do i=1,nloop + call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b + call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a +c call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a + call mpi_wait (msg,status,ierr) ! 2b + enddo + t1 = mpi_wtime () + + elseif (nid.eq.nodeb) then + + call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 1a + call nekgsync + call mpi_wait (msg,status,ierr) ! 1a + + j=i + do i=1,nloop + call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a +c call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b + call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b + call mpi_wait (msg,status,ierr) ! 2a + j=i + enddo +c call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb + call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb + + else + call nekgsync + endif + + return + end +c----------------------------------------------------------------------- + subroutine ping_loop2(t1,t0,len,nloop,nodea,nodeb,nid,x,y) + + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + + real x(1),y(1) + + include 'mpif.h' + integer status(mpi_status_size) + + i=0 + if (nid.eq.nodea) then + call nekgsync + call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 1b + call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a + call msgwait(msg) ! 1b + + t0 = mpi_wtime () + do i=1,nloop + call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a + call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b + call mpi_wait (msg,status,ierr) ! 2b + enddo + t1 = mpi_wtime () + + elseif (nid.eq.nodeb) then + + call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 1a + call nekgsync + call mpi_wait (msg,status,ierr) ! 1a + + j=i + do i=1,nloop + call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b + call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a + call mpi_wait (msg,status,ierr) ! 2a + j=i + enddo + call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb + + else + call nekgsync + endif + + return + end +c----------------------------------------------------------------------- + subroutine ping_loop(t1,t0,len,nloop,nodea,nodeb,nid,x1,y1,x2,y2) +c Double Buffer : does 2*nloop timings + + common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal + + real x1(1),y1(1),x2(1),y2(1) + + include 'mpif.h' + integer status(mpi_status_size) + + itag=1 + if (nid.eq.nodea) then + call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr) ! 1b + call nekgsync + + + t0 = mpi_wtime () + do i=1,nloop + call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! 1a + call mpi_irecv(y2,len,mpi_byte,nodeb,itag,nekcomm,msg2,ierr)! 2b + call mpi_wait (msg1,status,ierr) ! 1b + call mpi_send (x2,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! 2a + call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr)! 3b + call mpi_wait (msg2,status,ierr) ! 2b + enddo + t1 = mpi_wtime () + call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! nb + call mpi_wait (msg1,status,ierr) ! nb + + elseif (nid.eq.nodeb) then + + call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr) ! nb + call nekgsync + + + do i=1,nloop + call mpi_wait (msg1,status,ierr) ! 1a + call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr) ! 1b + call mpi_irecv(y2,len,mpi_byte,nodea,itag,nekcomm,msg2,ierr)! 2a + call mpi_wait (msg2,status,ierr) ! 2a + call mpi_send (x2,len,mpi_byte,nodea,itag,nekcomm,ierr) ! 2b + call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr)! 3a + enddo + call mpi_wait (msg1,status,ierr) ! 2a + call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr) ! nb + + else + call nekgsync + endif + + return + end + + diff --git a/src/driver.f b/src/driver.f new file mode 100644 index 0000000..f45aa50 --- /dev/null +++ b/src/driver.f @@ -0,0 +1,660 @@ +c----------------------------------------------------------------------- + program nekbone + + include 'SIZE' + include 'TOTAL' + include 'mpif.h' + + parameter (lxyz = lx1*ly1*lz1) + parameter (lt=lxyz*lelt) + + real ah(lx1*lx1),bh(lx1),ch(lx1*lx1),dh(lx1*lx1) + $ ,zpts(2*lx1),wght(2*lx1) + + real x(lt),f(lt),r(lt),w(lt),p(lt),z(lt),c(lt) + real g(6,lt) + real mfloplist(1024), avmflop + real tstart, tstop + integer icount + + logical ifbrick + integer iel0,ielN,ielD ! element range per proc. + integer nx0,nxN,nxD ! poly. order range + integer npx,npy,npz ! poly. order range + integer mx,my,mz ! poly. order range + integer numthreads, omp_get_max_threads + + call iniproc(mpi_comm_world) ! has nekmpi common block + tstart = dnekclock() + call read_param(ifbrick,iel0,ielN,ielD,nx0,nxN,nxD, + + npx,npy,npz,mx,my,mz) + + numthreads = 1 +#ifdef _OPENMP + numthreads= omp_get_max_threads() +#endif + + if (nid.eq.0) then + write(*,*) "Max number of threads: ", numthreads + end if + +c GET PLATFORM CHARACTERISTICS +c iverbose = 1 +c call platform_timer(iverbose) ! iverbose=0 or 1 + + icount = 0 + +#ifndef NITER +#define NITER 100 +#endif + niter = NITER + + if (nid.eq.0) then + write(*,*) "Number of iterations: ", niter + end if + +#ifdef LOG +#define WLOG(X) if (nid .eq. 0) write(*,*) X +#else +#define WLOG(X) +#endif + +c SET UP and RUN NEKBONE + do nx1=nx0,nxN,nxD + WLOG("calling init_dim") + call init_dim + do nelt=iel0,ielN,ielD + WLOG("calling init_mesh") + call init_mesh(ifbrick,npx,npy,npz,mx,my,mz) + WLOG("calling prox_setupds") + call proxy_setupds (gsh) ! Has nekmpi common block + WLOG("calling set_multiplicity") + call set_multiplicity (c) ! Inverse of counting matrix + + WLOG("calling proxy_setup") + call proxy_setup(ah,bh,ch,dh,zpts,wght,g) + + n = nx1*ny1*nz1*nelt + + WLOG("calling set_f") + call set_f(f,c,n) + WLOG("calling cg") + call cg(x,f,g,c,r,w,p,z,n,niter,flop_cg) + + WLOG("calling nekgsync") + call nekgsync() + + WLOG("calling set_timer_flop_count") + call set_timer_flop_cnt(0) + WLOG("calling cg") + call cg(x,f,g,c,r,w,p,z,n,niter,flop_cg) + WLOG("calling set_timer_flop_count") + call set_timer_flop_cnt(1) + + WLOG("calling gs_free") + call gs_free(gsh) + + icount = icount + 1 + mfloplist(icount)= mflops*np + enddo + enddo + + avmflop = 0.0 + do i = 1, icount + avmflop = avmflop + mfloplist(i) + end do + + if (icount .ne. 0) then + avmflop = avmflop/icount + end if + + if (nid .eq. 0) then + write(6,1) avmflop + end if + 1 format('Av MFlops = ', 1pe12.4) + +c TEST BANDWIDTH BISECTION CAPACITY +c call xfer(np,cr_h) + + call nekgsync() + tstop = dnekclock() + if (nid .eq.0) write(*,*) "Total run time = ", tstop-tstart + + call exitt0 + + end +c-------------------------------------------------------------- + subroutine set_f(f,c,n) + real f(n),c(n) + integer i + integer, allocatable :: seed(:) + + call RANDOM_SEED(SIZE=i) + allocate(seed(i)) + seed = 5 + call RANDOM_SEED(PUT=seed(1:i)) + + do i=1,n + call RANDOM_NUMBER(f(i)) + enddo + + call dssum(f) + call col2 (f,c,n) + + deallocate(seed) + + return + end +c----------------------------------------------------------------------- + subroutine init_dim + +C Transfer array dimensions to common + + include 'SIZE' + include 'INPUT' + + ny1=nx1 + nz1=nx1 + + ndim=ldim + + return + end +c----------------------------------------------------------------------- + subroutine init_mesh(ifbrick,npx,npy,npz,mx,my,mz) + include 'SIZE' + include 'TOTAL' + logical ifbrick + integer e,eg,offs + + + if(.not.ifbrick) then ! A 1-D array of elements of length P*lelt + 10 continue + nelx = nelt*np + nely = 1 + nelz = 1 + + do e=1,nelt + eg = e + nid*nelt + lglel(e) = eg + enddo + else ! A 3-D block of elements + if (npx*npy*npz .ne. np) then + call cubic(npx,npy,npz,np) !xyz distribution of total proc + end if + if (mx*my*mz .ne. nelt) then + call cubic(mx,my,mz,nelt) !xyz distribution of elements per proc + end if + +c if(mx.eq.nelt) goto 10 + + nelx = mx*npx + nely = my*npy + nelz = mz*npz + + e = 1 + offs = (mod(nid,npx)*mx) + npx*(my*mx)*(mod(nid/npx,npy)) + $ + (npx*npy)*(mx*my*mz)*(nid/(npx*npy)) + do k = 0,mz-1 + do j = 0,my-1 + do i = 0,mx-1 + eg = offs+i+(j*nelx)+(k*nelx*nely)+1 + lglel(e) = eg + e = e+1 + enddo + enddo + enddo + endif + + if (nid.eq.0) then + write(6,*) "Processes: npx= ", npx, " npy= ", npy, " npz= ", npz + write(6,*) "Local Elements: mx= ", mx, " my= ", my, " mz= ", mz + write(6,*) "Elements: nelx= ", nelx, " nely= ", nely, + & " nelz= ", nelz + end if + + return + end +c----------------------------------------------------------------------- + subroutine cubic(mx,my,mz,np) + + mx = np + my = 1 + mz = 1 + ratio = np + + iroot3 = np**(1./3.) + 0.000001 + do i= iroot3,1,-1 + iz = i + myx = np/iz + nrem = np-myx*iz + + if (nrem.eq.0) then + iroot2 = myx**(1./2.) + 0.000001 + do j=iroot2,1,-1 + iy = j + ix = myx/iy + nrem = myx-ix*iy + if (nrem.eq.0) goto 20 + enddo + 20 continue + + if (ix < iy) then + it = ix + ix = iy + iy = it + end if + + if (ix < iz) then + it = ix + ix = iz + iz = it + end if + + if (iy < iz) then + it = iy + iy = iz + iz = it + end if + + if ( REAL(ix)/iz < ratio) then + ratio = REAL(ix)/iz + mx = ix + my = iy + mz = iz + end if + + end if + enddo + + return + end + +c----------------------------------------------------------------------- + subroutine set_multiplicity (c) ! Inverse of counting matrix + include 'SIZE' + include 'TOTAL' + + real c(1) + + n = nx1*ny1*nz1*nelt + + call rone(c,n) + call gs_op(gsh,c,1,1,0) ! Gather-scatter operation ! w = QQ w + + do i=1,n + c(i) = 1./c(i) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine set_timer_flop_cnt(iset) + include 'SIZE' + include 'TOTAL' + include 'TIMER' + + integer i, numThrd, totThd + integer omp_get_max_threads + real tmp1(20), tmp2(20), tmp3(20), tmp4(20) + + real time0,time1 + save time0,time1 + + if (iset.eq.0) then + flop_a = 0 + flop_cg = 0 + + do i = 1, tmax + trzero(i) = 0 + tcopy(i) = 0 + tsolvem(i) = 0 + tglsc3a(i) = 0 + tglsc3b(i) = 0 + tglsc3c(i) = 0 + tglsc3d(i) = 0 + tadd2s1(i) = 0 + tadd2s2a(i) = 0 + tadd2s2b(i) = 0 + tadd2s2c(i) = 0 + tlocalgrad3(i) = 0 + twrwswt(i) = 0 + tlocalgrad3t(i) = 0 + tgsop(i) = 0 + tgop(1,i) = 0 + tgop(2,i) = 0 + tgop(3,i) = 0 + tgop(4,i) = 0 + end do + + time0 = dnekclock() + else + time1 = dnekclock()-time0 + if (time1.gt.0) mflops = (1.0*flop_a+1.0*flop_cg)/(1.e6*time1) + + if (nid.eq.0) then + write(6,1) nelt,np,nx1, nelt*np + write(6,2) mflops*np, mflops + write(6,3) REAL(flop_a),REAL(flop_cg),time1 + end if + + 1 format('nelt = ', i7, ', np = ', i9, ', nx1 = ', i7, + & ', elements =', i10 ) + 2 format('Tot MFlops = ', 1pe12.5, ', MFlops = ', e12.5) + 3 format('Ax FOp = ', 1pe12.5, ', CG FOp = ', e12.5, + & ', Solve Time = ', e12.5) + +#ifdef TIMERS + numThrd = 1 +#ifdef _OPENMP + numThrd = omp_get_max_threads() +#endif + totThd = numThrd*np + + do i = 1, numThrd + tglsc3a(i) = tglsc3a(i) - tgop(1,i) + tglsc3b(i) = tglsc3b(i) - tgop(2,i) + tglsc3c(i) = tglsc3c(i) - tgop(3,i) + tglsc3d(i) = tglsc3d(i) - tgop(4,i) + end do + + do i = 1,20 + tmp1(i) = 0.0 + end do + + tmp1(1)= time1 + do i = 1, numThrd + tmp1(2)= tmp1(2) + trzero(i) + tmp1(3)= tmp1(3) + tcopy(i) + tmp1(4)= tmp1(4) + tsolvem(i) + tmp1(5)= tmp1(5) + tglsc3a(i) + tmp1(6)= tmp1(6) + tglsc3b(i) + tmp1(7)= tmp1(7) + tglsc3c(i) + tmp1(8)= tmp1(8) + tglsc3d(i) + tmp1(9)= tmp1(9) + tadd2s1(i) + tmp1(10)= tmp1(10) + tadd2s2a(i) + tmp1(11)= tmp1(11) + tadd2s2b(i) + tmp1(12)= tmp1(12) + tadd2s2c(i) + tmp1(13)= tmp1(13) + tlocalgrad3(i) + tmp1(14)= tmp1(14) + twrwswt(i) + tmp1(15)= tmp1(15) + tlocalgrad3t(i) + tmp1(16)= tmp1(16) + tgsop(i) + tmp1(17)= tmp1(17) + tgop(1,i) + tmp1(18)= tmp1(18) + tgop(2,i) + tmp1(19)= tmp1(19) + tgop(3,i) + tmp1(20)= tmp1(20) + tgop(4,i) + end do + + call gop(tmp1, tmp4, '+ ', 20) + + tmp2(1)= time1 + tmp2(2)= trzero(1) + tmp2(3)= tcopy(1) + tmp2(4)= tsolvem(1) + tmp2(5)= tglsc3a(1) + tmp2(6)= tglsc3b(1) + tmp2(7)= tglsc3c(1) + tmp2(8)= tglsc3d(1) + tmp2(9)= tadd2s1(1) + tmp2(10)= tadd2s2a(1) + tmp2(11)= tadd2s2b(1) + tmp2(12)= tadd2s2c(1) + tmp2(13)= tlocalgrad3(1) + tmp2(14)= twrwswt(1) + tmp2(15)= tlocalgrad3t(1) + tmp2(16)= tgsop(1) + tmp2(17)= tgop(1,1) + tmp2(18)= tgop(2,1) + tmp2(19)= tgop(3,1) + tmp2(20)= tgop(4,1) + + do i = 2, numThrd + if (trzero(i) < tmp2(2)) tmp2(2)= trzero(i) + if (tcopy(i) < tmp2(3)) tmp2(3)= tcopy(i) + if (tsolvem(i) < tmp2(4)) tmp2(4)= tsolvem(i) + if (tglsc3a(i) < tmp2(5)) tmp2(5)= tglsc3a(i) + if (tglsc3b(i) < tmp2(6)) tmp2(6)= tglsc3b(i) + if (tglsc3c(i) < tmp2(7)) tmp2(7)= tglsc3c(i) + if (tglsc3d(i) < tmp2(8)) tmp2(8)= tglsc3d(i) + if (tadd2s1(i) < tmp2(9)) tmp2(9)= tadd2s1(i) + if (tadd2s2a(i) < tmp2(10)) tmp2(10)= tadd2s2a(i) + if (tadd2s2b(i) < tmp2(11)) tmp2(11)= tadd2s2b(i) + if (tadd2s2c(i) < tmp2(12)) tmp2(12)= tadd2s2c(i) + if (tlocalgrad3(i) < tmp2(13)) tmp2(13)= tlocalgrad3(i) + if (twrwswt(i) < tmp2(14)) tmp2(14)= twrwswt(i) + if (tlocalgrad3t(i) < tmp2(15)) tmp2(15)= tlocalgrad3t(i) + if (tgsop(i) < tmp2(16)) tmp2(16)= tgsop(i) + if (tgop(1,i) < tmp2(17)) tmp2(17)= tgop(1,i) + if (tgop(2,i) < tmp2(18)) tmp2(18)= tgop(2,i) + if (tgop(3,i) < tmp2(19)) tmp2(19)= tgop(3,i) + if (tgop(4,i) < tmp2(20)) tmp2(20)= tgop(4,i) + end do + + call gop(tmp2, tmp4, 'm ', 20) + + tmp3(1)= time1 + tmp3(2)= trzero(1) + tmp3(3)= tcopy(1) + tmp3(4)= tsolvem(1) + tmp3(5)= tglsc3a(1) + tmp3(6)= tglsc3b(1) + tmp3(7)= tglsc3c(1) + tmp3(8)= tglsc3d(1) + tmp3(9)= tadd2s1(1) + tmp3(10)= tadd2s2a(1) + tmp3(11)= tadd2s2b(1) + tmp3(12)= tadd2s2c(1) + tmp3(13)= tlocalgrad3(1) + tmp3(14)= twrwswt(1) + tmp3(15)= tlocalgrad3t(1) + tmp3(16)= tgsop(1) + tmp3(17)= tgop(1,1) + tmp3(18)= tgop(2,1) + tmp3(19)= tgop(3,1) + tmp3(20)= tgop(4,1) + + do i = 2, numThrd + if (trzero(i) > tmp3(2)) tmp3(2)= trzero(i) + if (tcopy(i) > tmp3(3)) tmp3(3)= tcopy(i) + if (tsolvem(i) > tmp3(4)) tmp3(4)= tsolvem(i) + if (tglsc3a(i) > tmp3(5)) tmp3(5)= tglsc3a(i) + if (tglsc3b(i) > tmp3(6)) tmp3(6)= tglsc3b(i) + if (tglsc3c(i) > tmp3(7)) tmp3(7)= tglsc3c(i) + if (tglsc3d(i) > tmp3(8)) tmp3(8)= tglsc3d(i) + if (tadd2s1(i) > tmp3(9)) tmp3(9)= tadd2s1(i) + if (tadd2s2a(i) > tmp3(10)) tmp3(10)= tadd2s2a(i) + if (tadd2s2b(i) > tmp3(11)) tmp3(11)= tadd2s2b(i) + if (tadd2s2c(i) > tmp3(12)) tmp3(12)= tadd2s2c(i) + if (tlocalgrad3(i) > tmp3(13)) tmp3(13)= tlocalgrad3(i) + if (twrwswt(i) > tmp3(14)) tmp3(14)= twrwswt(i) + if (tlocalgrad3t(i) > tmp3(15)) tmp3(15)= tlocalgrad3t(i) + if (tgsop(i) > tmp3(16)) tmp3(16)= tgsop(i) + if (tgop(1,i) > tmp3(17)) tmp3(17)= tgop(1,i) + if (tgop(2,i) > tmp3(18)) tmp3(18)= tgop(2,i) + if (tgop(3,i) > tmp3(19)) tmp3(19)= tgop(3,i) + if (tgop(4,i) > tmp3(20)) tmp3(20)= tgop(4,i) + end do + + call gop(tmp3, tmp4, 'M ', 20) + + if (nid.eq.0) then + write(6,4) "time = ",tmp1(1)/np, tmp2(1), tmp3(1) + write(6,4) "rzero = ",tmp1(2)/totThd, tmp2(2), tmp3(2) + write(6,4) "copy = ",tmp1(3)/totThd, tmp2(3), tmp3(3) + write(6,4) "glsc3a = ",tmp1(5)/totThd, tmp2(5), tmp3(5) + write(6,4) "gopa = ",tmp1(17)/totThd, tmp2(17), tmp3(17) + write(6,4) "solveM = ",tmp1(4)/totThd, tmp2(4), tmp3(4) + write(6,4) "glsc3b = ",tmp1(6)/totThd, tmp2(6), tmp3(6) + write(6,4) "gopb = ",tmp1(18)/totThd, tmp2(18), tmp3(18) + write(6,4) "add2s1 = ",tmp1(9)/totThd, tmp2(9), tmp3(9) + write(6,4) "localgrad3 = ",tmp1(13)/totThd, tmp2(13), tmp3(13) + write(6,4) "wrwswt = ",tmp1(14)/totThd, tmp2(14), tmp3(14) + write(6,4) "localgradt = ",tmp1(15)/totThd, tmp2(15), tmp3(15) + write(6,4) "gsop = ",tmp1(16)/totThd, tmp2(16), tmp3(16) + write(6,4) "add2s2a = ",tmp1(10)/totThd, tmp2(10), tmp3(10) + write(6,4) "glsc3c = ",tmp1(7)/totThd, tmp2(7), tmp3(7) + write(6,4) "gopc = ",tmp1(19)/totThd, tmp2(19), tmp3(19) + write(6,4) "add2s2b = ",tmp1(11)/totThd, tmp2(11), tmp3(11) + write(6,4) "add2s2c = ",tmp1(12)/totThd, tmp2(12), tmp3(12) + write(6,4) "glsc3d = ",tmp1(8)/totThd, tmp2(8), tmp3(8) + write(6,4) "gopd = ",tmp1(20)/totThd, tmp2(20), tmp3(20) + endif + + 4 format(A, 1pe12.4, e12.4, e12.4) + +c if (nid.eq.0) then +c write(6,4) "av time: ", tmp2(1)/np, tmp2(2)/totThd, +c & tmp2(3)/totThd, tmp2(4)/totThd, tmp2(5)/totThd +c write(6,5) "av time: ", tmp2(5)/totThd, tmp2(6)/totThd, +c & tmp2(7)/totThd, tmp2(8)/totThd +c endif + +c if (nid.eq.0) then +c write(6,4) "min time: ", tmp2(1), tmp2(2), tmp2(3), +c & tmp2(4), tmp2(5) +c write(6,5) "min time: ", tmp2(5), tmp2(6), tmp2(7), +c & tmp2(8) +c endif + +c if (nid.eq.0) then +c write(6,4) "max time: ", tmp2(1), tmp2(2), tmp2(3), +c & tmp2(4), tmp2(5) +c write(6,5) "max time: ", tmp2(5), tmp2(6), tmp2(7), +c & tmp2(8) +c endif + +c 4 format(A, ' cg= ', 1pe12.4, ', zcm= ', e12.4, +c & ', glsc3= ', e12.4, ', add2sx= ', e12.4, +c & ', ax= ', e12.4) +c 5 format(A, ' ax= ', 1pe12.4, ', add2s2= ', e12.4, +c & ', gsop= ', e12.4, ', axe= ', e12.4) +#endif + endif + + return + end +c----------------------------------------------------------------------- + subroutine xfer(np,gsh) + include 'SIZE' + parameter(npts_max = lx1*ly1*lz1*lelt) + + real buffer(2,npts_max) + integer ikey(npts_max) + + + nbuf = 800 + npts = 1 + do itest=1,200 + npoints = npts*np + + call load_points(buffer,nppp,npoints,npts,nbuf) + iend = mod1(npoints,nbuf) + istart = 1 + if(nid.ne.0)istart = iend+(nid-1)*nbuf+1 + do i = 1,nppp + icount=istart+(i-1) + ikey(i)=mod(icount,np) + enddo + + call nekgsync + time0 = dnekclock() + do loop=1,50 + call crystal_tuple_transfer(gsh,nppp,npts_max, + $ ikey,1,ifake,0,buffer,2,1) + enddo + time1 = dnekclock() + etime = (time1-time0)/50 + + if (nid.eq.0) write(6,1) np,npts,npoints,etime + 1 format(2i7,i10,1p1e12.4,' bandwidth' ) + npts = 1.02*(npts+1) + if (npts.gt.npts_max) goto 100 + enddo + 100 continue + + return + end +c----------------------------------------------------------------------- + subroutine load_points(buffer,nppp,npoints,npts,nbuf) + include 'SIZE' + include 'PARALLEL' + + real buffer(2,nbuf) + + nppp=0 + if(nbuf.gt.npts) then + npass = 1+npoints/nbuf + + do ipass = 1,npass + if(nid.eq.ipass.and.ipass.ne.npass) then + do i = 1,nbuf + buffer(1,i)=i + buffer(2,i)=nid + enddo + nppp=nbuf + elseif (npass.eq.ipass.and.nid.eq.0) then + mbuf=mod1(npoints,nbuf) + do i=1,mbuf + buffer(1,i)=i + buffer(2,i)=nid + enddo + nppp=mbuf + endif + enddo + else + do i = 1,npts + buffer(1,i)=i + buffer(2,i)=nid + enddo + nppp=npts + endif + + return + end +c---------------------------------------------------------------------- + subroutine read_param(ifbrick,iel0,ielN,ielD,nx0,nxN,nxD, + + npx,npy,npz,mx,my,mz) + include 'SIZE' + logical ifbrick + integer iel0,ielN,ielD,nx0,nxN,nxD,npx,npy,npz,mx,my,mz + + !open .rea + if(nid.eq.0) then + open(unit=9,file='data.rea',status='old') + read(9,*,err=100) ifbrick + read(9,*,err=100) iel0,ielN,ielD + read(9,*,err=100) nx0,nxN,nxD + read(9,*,err=100) npx,npy,npz + read(9,*,err=100) mx,my,mz + close(9) + endif + call bcast(ifbrick,4) + call bcast(iel0,4) + call bcast(ielN,4) + call bcast(ielD,4) +c nx0=lx1 +c nxN=lx1 + call bcast(nx0,4) + call bcast(nxN,4) + call bcast(nxD,4) + call bcast(npx,4) + call bcast(npy,4) + call bcast(npz,4) + call bcast(mx,4) + call bcast(my,4) + call bcast(mz,4) + if(iel0.gt.ielN.or.nx0.gt.nxN) goto 200 + + return + + 100 continue + write(6,*) "ERROR READING data.rea....ABORT" + call exitt0 + + 200 continue + write(6,*) "ERROR data.rea :: iel0 > ielN or nx0 > nxN :: ABORT" + call exitt0 + + return + end +c----------------------------------------------------------------------- diff --git a/src/driver_comm.f b/src/driver_comm.f new file mode 100644 index 0000000..d66e436 --- /dev/null +++ b/src/driver_comm.f @@ -0,0 +1,21 @@ +c----------------------------------------------------------------------- + program nekbone + + include 'SIZE' + include 'TOTAL' + include 'mpif.h' + + parameter (lxyz = lx1*ly1*lz1) + parameter (lt=lxyz*lelt) + + + call iniproc(mpi_comm_world) ! has nekmpi common block + +c GET PLATFORM CHARACTERISTICS + iverbose = 1 + call platform_timer(iverbose) ! iverbose=0 or 1 + + call exitt0 + + end +c-------------------------------------------------------------- diff --git a/src/jl/Makefile b/src/jl/Makefile new file mode 100644 index 0000000..96c2f75 --- /dev/null +++ b/src/jl/Makefile @@ -0,0 +1,91 @@ +CC=mpicc -std=c99 --pedantic +CFLAGS+=-DMPI +CFLAGS+=-DPREFIX=jl_ +CFLAGS+=-DNO_NEK_EXITT +CFLAGS+=-DGLOBAL_LONG +LDFLAGS+=-lm + +#CFLAGS+=-DPRINT_MALLOCS=1 + +CFLAGS+=-DUSE_NAIVE_BLAS +#CFLAGS+=-DUSE_CBLAS +#LDFLAGS+=-lcblas + +#CFLAGS+=-DAMG_DUMP +CFLAGS+=-DGS_TIMING -DGS_BARRIER + +#CFLAGS+=-O0 -g +CFLAGS+=-O3 -march=native + +CFLAGS+=-W -Wall -Wno-unused-function -Wno-unused-parameter +#CFLAGS+=-Minform=warn + +CCCMD=$(CC) $(G) $(CFLAGS) +LINKCMD=$(CC) $(G) $(LDFLAGS) +#RLINKCMD = $(LD) -r +.PHONY: cmds deps tests clean objects odepinfo + +TESTS=sort_test sort_test2 sarray_sort_test spchol_test \ + comm_test crystal_test sarray_transfer_test \ + gs_test gs_test_old gs_unique_test \ + xxt_test xxt_test2 crs_test \ + poly_test poly_test2 lob_bnd_test obbox_test \ + findpts_el_2_test findpts_el_2_test2 \ + findpts_el_3_test findpts_el_3_test2 \ + findpts_local_test findpts_test + +CRS=$(AMG) + +tests: $(TESTS) ; +clean: ; @$(RM) $(TESTS) *.o *.s + +cmds: ; @echo CC = $(CCCMD); echo LINK = $(LINKCMD); + +deps: ; ./cdep.py *.c > makefile.cdep; + +odepinfo: deps objects; @./odep_info.py *.o + +-include makefile.cdep + +%.o: %.c ; @echo CC $<; $(CCCMD) -c $< -o $@ +%.s: %.c ; @echo CC -S $<; $(CCCMD) -S $< -o $@ +objects: $(OBJECTS) ; + +poly_imp.h: gen_poly_imp.c + $(RM) poly_imp.h; + $(CC) -lgmp -lm gen_poly_imp.c -o gen_poly_imp; + ./gen_poly_imp > poly_imp.h; + $(RM) gen_poly_imp + +GS_OBJECTS=gs.o sort.o sarray_transfer.o sarray_sort.o \ + gs_local.o fail.o crystal.o comm.o tensor.o + +XXT=sparse_cholesky.o xxt.o +AMG=amg.o + +sort_test: sort.o fail.o comm.o tensor.o gs_local.o sort_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +sort_test2: sort.o fail.o comm.o tensor.o gs_local.o sort_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +sarray_sort_test: sort.o fail.o comm.o tensor.o gs_local.o sarray_sort.o sarray_sort_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +spchol_test: sparse_cholesky.o sort.o fail.o comm.o tensor.o gs_local.o spchol_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +comm_test: fail.o comm.o tensor.o gs_local.o comm_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +crystal_test: fail.o crystal.o comm.o tensor.o gs_local.o crystal_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +sarray_transfer_test: sarray_transfer.o sarray_sort.o sort.o fail.o crystal.o comm.o tensor.o gs_local.o sarray_transfer_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ + +gs_test: gs_test.o $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +gs_test_old: gs_test_old.o $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +gs_unique_test: gs_unique_test.o $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +xxt_test: xxt_test.o $(CRS) $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +xxt_test2: xxt_test2.o $(CRS) $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ +crs_test: crs_test.o $(CRS) $(GS_OBJECTS); @echo LINK $@; $(LINKCMD) $^ -o $@ + +poly_test2: poly.o fail.o comm.o tensor.o gs_local.o poly_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +poly_test: poly.o fail.o comm.o tensor.o gs_local.o poly_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +lob_bnd_test: tensor.o poly.o lob_bnd.o fail.o comm.o gs_local.o lob_bnd_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +obbox_test: rand_elt_test.o poly.o obbox.o tensor.o lob_bnd.o fail.o comm.o gs_local.o obbox_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_2_test2: tensor.o rand_elt_test.o lob_bnd.o fail.o comm.o gs_local.o poly.o findpts_el_2.o findpts_el_2_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_2_test: poly.o fail.o comm.o tensor.o gs_local.o findpts_el_2.o findpts_el_2_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_3_test2: tensor.o rand_elt_test.o lob_bnd.o fail.o comm.o gs_local.o poly.o findpts_el_3.o findpts_el_3_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_el_3_test: poly.o fail.o comm.o tensor.o gs_local.o findpts_el_3.o findpts_el_3_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_local_test: rand_elt_test.o lob_bnd.o fail.o comm.o tensor.o gs_local.o poly.o findpts_local.o sort.o sarray_sort.o obbox.o findpts_el_3.o findpts_el_2.o findpts_local_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ +findpts_test: sarray_transfer.o sort.o rand_elt_test.o lob_bnd.o poly.o findpts.o sarray_sort.o findpts_local.o obbox.o tensor.o findpts_el_3.o findpts_el_2.o fail.o crystal.o comm.o gs_local.o findpts_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@ + diff --git a/src/jl/README b/src/jl/README new file mode 100644 index 0000000..36c66ab --- /dev/null +++ b/src/jl/README @@ -0,0 +1,69 @@ + +A high-level view of the code in this directory is as follows. See each header +file listed for more documentation. + +The following headers are fundamental to most of the code. + + name.h: a given prefix is added to all external symbols; + determines how FORTRAN routines are named + types.h: defines the integer types used everywhere (e.g., for array indices) + mem.h: memory-management wrappers; + "array" type (generic dynamically sized array); + "buffer" type ( = char array ) + comm.h: wrappers for MPI calls (with alternative single proc versions) + +The Gather/Scatter library top-level interface is defined in "gs.h". +The file "gs_defs.h" defines the datatypes and operations that it supports. + +There are two coarse solvers (XXT and AMG), which are not currently very well +documented. The interface is given in "crs.h". + +"findpts" is documented in "findpts.c". The idea is that during a run of an +SEM code, we have a geometry map + (processor, element, r, s, t) -> (x, y, z) +that defines our mesh. Within each element, the xyz coordinate is a +polynomial function of the parametric r,s,t coordinates. +"findpts" takes a distributed list of "(x,y,z)" points and computes the inverse +of the above map. +"findpts_eval" takes a list of "(proc,el,r,s,t)" coords, e.g., as returned by + "findpts", and interpolates a given field at each point. + + +The "workhorses" of the implementations of much of the above are the +"sarray_sort" and "sarray_transfer" routines, documented in the respective +headers. The "array" type, defined in "mem.h", can be used to keep track of a +dynamically sized array of (arbitrary) structs. + + sarray_sort.h: + sort an array of structs (locally/sequentially) by one or two of its fields + sarray_transfer.h: + transfer each struct in array to the processor specified by a given field + +These in turn, are implemented using the lower-level routines of +"sort.h", and "crystal_router.h". + + +The "findpts" algorithm makes use of a number of lower-level routines +possibly useful on their own. + + poly.h: computation of quadrature nodes; fast polynomial interpolation + lob_bnd.h: (relatively) fast yet robust bounds for polynomials on [-1,1]^d + obbox.h: oriented as well as axis-aligned bounding boxes for spectral els + tensor.h: some tensor-product applications, + with BLAS ops delegated to Nek, cblas, or a naive imp + +All of the preprocessor macros that affect compilation are: + name.h: PREFIX="..." prefix added to all C external symbols + FPREFIX="..." prefix added to all FORTRAN routines + UPCASE, UNDERSCORE determines FORTRAN naming convention + types.h: USE_LONG, USE_LONG_LONG, GLOBAL_LONG, GLOBAL_LONG_LONG + determine the integer types used by all code + mem.h: PRINT_MALLOCS=1 (print all mem mngmt to stdout) + comm.h: MPI (use MPI when defined; + otherwise, use a dummy single-proc implementation) + tensor.h: USE_CBLAS, USE_NAIVE_BLAS + (select BLAS implementation; default is Nek's mxm) + fail.c: NO_NEK_EXITT when defined, don't call Nek's exitt routine + amg.c: AMG_BLOCK_ROWS number of rows to read at a time (default=1200) + GS_TIMING record timings for the matrix multiplies + GS_BARRIER use a barrier to improve the quality of the timings diff --git a/src/jl/c99.h b/src/jl/c99.h new file mode 100644 index 0000000..a5a44e3 --- /dev/null +++ b/src/jl/c99.h @@ -0,0 +1,16 @@ +#ifndef C99_H +#define C99_H + +#ifndef __STDC_VERSION__ +# define NO_C99 +#elif __STDC_VERSION__ < 199901L +# define NO_C99 +#endif + +#ifdef NO_C99 +# define restrict +# define inline +# undef NO_C99 +#endif + +#endif diff --git a/src/jl/cdep.py b/src/jl/cdep.py new file mode 100755 index 0000000..a0dd87a --- /dev/null +++ b/src/jl/cdep.py @@ -0,0 +1,33 @@ +#!/usr/bin/python + +import sys, os, re + +#mergestr = lambda x: reduce((lambda a,b: a+" "+b),x,"") + +pathjoin = lambda a,b: os.path.normpath(os.path.join(a,b)) +include_re = re.compile("\s*#\s*include\s*\"([^\"]*)\"") +incmatch = lambda x: ( include_re.match(line) for line in open(x) ) +incline = lambda x,m: pathjoin(os.path.split(x)[0],m.group(1)) +incl = lambda x: [ incline(x,m) for m in incmatch(x) if m!=None ] +includes = {} +def get_include(x): + if not includes.has_key(x): includes[x] = incl(x) + return includes[x] + +def closure(seq,f): + v = [], [x for x in seq], set(x for x in seq) + while len(v[1]): [(v[1].append(y),v[2].add(y)) for y in + f((lambda x: (v[0].append(x),x)[1])(v[1].pop())) if not y in v[2]] + return v[0] + +src_files = sys.argv[1:] +files = closure(src_files, get_include) +deps = dict((x,closure(includes[x],lambda y: includes[y])) for x in src_files) + +obj = lambda x: os.path.splitext(x)[0]+".o" + +for x in src_files: + print obj(x)+": "+x+reduce((lambda a,b: a+" "+b),deps[x],"") + +print +print "OBJECTS="+reduce((lambda a,b: a+" "+obj(b)),src_files,"") diff --git a/src/jl/comm.c b/src/jl/comm.c new file mode 100644 index 0000000..8e5c9a3 --- /dev/null +++ b/src/jl/comm.c @@ -0,0 +1,175 @@ +#include /* for size_t */ +#include /* for exit */ +#include /* memcpy */ +#include /* for gs identities */ +#include /* for gs identities */ +#include "name.h" +#include "fail.h" +#include "types.h" +#include "tensor.h" +#include "gs_defs.h" +#include "gs_local.h" +#include "comm.h" + +uint comm_gbl_id=0, comm_gbl_np=1; + +GS_DEFINE_IDENTITIES() +GS_DEFINE_DOM_SIZES() + +static void scan_imp(void *scan, const struct comm *com, gs_dom dom, gs_op op, + const void *v, uint vn, void *buffer) +{ + comm_req req[2]; + size_t vsize = vn*gs_dom_size[dom]; + const uint id=com->id, np=com->np; + uint n = np, c=1, odd=0, base=0; + void *buf[2]; + void *red = (char*)scan+vsize; + buf[0]=buffer,buf[1]=(char*)buffer+vsize; + while(n>1) { + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } + gs_init_array(scan,vn,dom,op); + memcpy(red,v,vsize); + while(n>=1, n<<=1, n+=(odd&1); + odd>>=1; + if(base==id) { + comm_irecv(&req[0],com, buf[0],vsize, id+n/2,id+n/2); + comm_isend(&req[1],com, red ,vsize, id+n/2,id); + comm_wait(req,2); + gs_gather_array(red,buf[0],vn,dom,op); + } else { + comm_irecv(&req[0],com, scan,vsize, base,base); + comm_isend(&req[1],com, red ,vsize, base,id); + comm_wait(req,2); + break; + } + } + while(n>1) { + if(base==id) { + comm_send(com, scan ,2*vsize, id+n/2,id); + } else { + comm_recv(com, buffer,2*vsize, base,base); + gs_gather_array(scan,buf[0],vn,dom,op); + memcpy(red,buf[1],vsize); + } + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } +} + + +static void allreduce_imp(const struct comm *com, gs_dom dom, gs_op op, + void *v, uint vn, void *buf) +{ + size_t total_size = vn*gs_dom_size[dom]; + const uint id=com->id, np=com->np; + uint n = np, c=1, odd=0, base=0; + while(n>1) { + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } + while(n>=1, n<<=1, n+=(odd&1); + odd>>=1; + if(base==id) { + comm_recv(com, buf,total_size, id+n/2,id+n/2); + gs_gather_array(v,buf,vn, dom,op); + } else { + comm_send(com, v,total_size, base,id); + break; + } + } + while(n>1) { + if(base==id) + comm_send(com, v,total_size, id+n/2,id); + else + comm_recv(com, v,total_size, base,base); + odd=(odd<<1)|(n&1); + c<<=1, n>>=1; + if(id>=base+n) c|=1, base+=n, n+=(odd&1); + } +} + +void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op, + const void *v, uint vn, void *buffer) +{ + scan_imp(scan, com,dom,op, v,vn, buffer); +} + +void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op, + void *v, uint vn, void *buf) +{ + if(vn==0) return; +#ifdef MPI + { + MPI_Datatype mpitype; + MPI_Op mpiop; + #define DOMAIN_SWITCH() do { \ + switch(dom) { case gs_double: mpitype=MPI_DOUBLE; break; \ + case gs_float: mpitype=MPI_FLOAT; break; \ + case gs_int: mpitype=MPI_INT; break; \ + case gs_long: mpitype=MPI_LONG; break; \ + WHEN_LONG_LONG(case gs_long_long: mpitype=MPI_LONG_LONG; break;) \ + default: goto comm_allreduce_byhand; \ + } \ + } while(0) + DOMAIN_SWITCH(); + #undef DOMAIN_SWITCH + switch(op) { case gs_add: mpiop=MPI_SUM; break; + case gs_mul: mpiop=MPI_PROD; break; + case gs_min: mpiop=MPI_MIN; break; + case gs_max: mpiop=MPI_MAX; break; + default: goto comm_allreduce_byhand; + } + MPI_Allreduce(v,buf,vn,mpitype,mpiop,com->c); + memcpy(v,buf,vn*gs_dom_size[dom]); + return; + } +#endif +#ifdef MPI +comm_allreduce_byhand: + allreduce_imp(com,dom,op, v,vn, buf); +#endif +} + +double comm_dot(const struct comm *comm, double *v, double *w, uint n) +{ + double s=tensor_dot(v,w,n),b; + comm_allreduce(comm,gs_double,gs_add, &s,1, &b); + return s; +} + +/* T comm_reduce__T(const struct comm *comm, gs_op op, const T *in, uint n) */ + +#define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break; +#define SWITCH_OP(T,op) do switch(op) { \ + GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0) + +#define WITH_OP(T,OP) \ + do { T v = *in++; GS_DO_##OP(accum,v); } while(--n) + +#define DEFINE_REDUCE(T) \ +T PREFIXED_NAME(comm_reduce__##T)( \ + const struct comm *comm, gs_op op, const T *in, uint n) \ +{ \ + T accum = gs_identity_##T[op], buf; \ + if(n!=0) SWITCH_OP(T,op); \ + comm_allreduce(comm,gs_##T,op, &accum,1, &buf); \ + return accum; \ +} + +GS_FOR_EACH_DOMAIN(DEFINE_REDUCE) + +#undef DEFINE_REDUCE +#undef WITH_OP +#undef SWITCH_OP +#undef SWITCH_OP_CASE + diff --git a/src/jl/comm.h b/src/jl/comm.h new file mode 100644 index 0000000..4d0ed3e --- /dev/null +++ b/src/jl/comm.h @@ -0,0 +1,255 @@ +#ifndef COMM_H +#define COMM_H + +/* requires: + for size_t + for exit + "fail.h", "types.h" + "gs_defs.h" for comm_allreduce, comm_scan, comm_reduce_T +*/ + +#if !defined(FAIL_H) || !defined(TYPES_H) +#warning "comm.h" requires "fail.h" and "types.h" +#endif + +/* + When the preprocessor macro MPI is defined, defines (very) thin wrappers + for the handful of used MPI routines. Alternatively, when MPI is not defined, + these wrappers become dummy routines suitable for a single process run. + No code outside of "comm.h" and "comm.c" makes use of MPI at all. + + Basic usage: + + struct comm c; + + comm_init(&c, MPI_COMM_WORLD); // initializes c using MPI_Comm_dup + + comm_free(&c); + + Very thin MPI wrappers: (see below for implementation) + + comm_send,_recv,_isend,_irecv,_time,_barrier + + Additionally, some reduction and scan routines are provided making use + of the definitions in "gs_defs.h" (provided this has been included first). + + Example comm_allreduce usage: + + double v[5], buf[5]; + comm_allreduce(&c, gs_double,gs_add, v,5,buf); + // Computes the vector sum of v across all procs, using + // buf as a scratch area. Delegates to MPI_Allreduce if possible. + + Example comm_scan usage: + + long in[5], out[2][5], buf[2][5]; + comm_scan(out, &c,gs_long,gs_add, in,5,buf); + // out[0] will be the vector sum of "in" across procs with ids + *strictly* less than this one (exclusive behavior), + and out[1] will be the vector sum across all procs, as would + be computed with comm_allreduce. + Note: differs from MPI_Scan which has inclusive behavior + + Example comm_reduce_double, etc. usage: + + T out, in[10]; + out = comm_reduce_T(&c, gs_max, in, 10); + // out will equal the largest element of "in", + across all processors + // T can be "double", "float", "int", "long", "slong", "sint", etc. + as defined in "gs_defs.h" + +*/ + +#ifdef MPI +#include +typedef MPI_Comm comm_ext; +typedef MPI_Request comm_req; +#else +typedef int comm_ext; +typedef int comm_req; +typedef int MPI_Fint; +#endif + +#define comm_allreduce PREFIXED_NAME(comm_allreduce) +#define comm_scan PREFIXED_NAME(comm_scan ) +#define comm_dot PREFIXED_NAME(comm_dot ) + +/* global id, np vars strictly for diagnostic messages (fail.c) */ +#ifndef comm_gbl_id +#define comm_gbl_id PREFIXED_NAME(comm_gbl_id) +#define comm_gbl_np PREFIXED_NAME(comm_gbl_np) +extern uint comm_gbl_id, comm_gbl_np; +#endif + +struct comm { + uint id, np; + comm_ext c; +}; + +static void comm_init(struct comm *c, comm_ext ce); +/* (macro) static void comm_init_check(struct comm *c, MPI_Fint ce, uint np); */ +/* (macro) static void comm_dup(struct comm *d, const struct comm *s); */ +static void comm_free(struct comm *c); +static double comm_time(void); +static void comm_barrier(const struct comm *c); +static void comm_recv(const struct comm *c, void *p, size_t n, + uint src, int tag); +static void comm_send(const struct comm *c, void *p, size_t n, + uint dst, int tag); +static void comm_irecv(comm_req *req, const struct comm *c, + void *p, size_t n, uint src, int tag); +static void comm_isend(comm_req *req, const struct comm *c, + void *p, size_t n, uint dst, int tag); +static void comm_wait(comm_req *req, int n); + +double comm_dot(const struct comm *comm, double *v, double *w, uint n); + +#ifdef GS_DEFS_H +void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op, + void *v, uint vn, void *buf); +void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op, + const void *v, uint vn, void *buffer); + +#define DEFINE_REDUCE(T) \ +T PREFIXED_NAME(comm_reduce__##T)( \ + const struct comm *comm, gs_op op, const T *in, uint n); \ +static T comm_reduce_##T(const struct comm *c, gs_op op, const T *v, uint vn) \ +{ return PREFIXED_NAME(comm_reduce__##T)(c,op,v,vn); } +GS_FOR_EACH_DOMAIN(DEFINE_REDUCE) +#undef DEFINE_REDUCE + +#define comm_reduce_sint \ + TYPE_LOCAL(comm_reduce_int,comm_reduce_long,comm_reduce_long_long) +#define comm_reduce_slong \ + TYPE_GLOBAL(comm_reduce_int,comm_reduce_long,comm_reduce_long_long) + +#endif + +/*---------------------------------------------------------------------------- + Code for static (inline) functions + ----------------------------------------------------------------------------*/ + +static void comm_init(struct comm *c, comm_ext ce) +{ +#ifdef MPI + int i; + MPI_Comm_dup(ce, &c->c); + MPI_Comm_rank(c->c,&i), comm_gbl_id=c->id=i; + MPI_Comm_size(c->c,&i), comm_gbl_np=c->np=i; +#else + c->id = 0, c->np = 1; +#endif +} + +static void comm_init_check_(struct comm *c, MPI_Fint ce, uint np, + const char *file, unsigned line) +{ +#ifdef MPI + comm_init(c,MPI_Comm_f2c(ce)); + if(c->np != np) + fail(1,file,line,"comm_init_check: passed P=%u, " + "but MPI_Comm_size gives P=%u",np,c->np); +#else + comm_init(c,0); + if(np != 1) + fail(1,file,line,"comm_init_check: passed P=%u, " + "but not compiled with -DMPI",np); +#endif +} +#define comm_init_check(c,ce,np) comm_init_check_(c,ce,np,__FILE__,__LINE__) + + +static void comm_dup_(struct comm *d, const struct comm *s, + const char *file, unsigned line) +{ + d->id = s->id, d->np = s->np; +#ifdef MPI + MPI_Comm_dup(s->c,&d->c); +#else + if(s->np!=1) fail(1,file,line,"%s not compiled with -DMPI\n",file); +#endif +} +#define comm_dup(d,s) comm_dup_(d,s,__FILE__,__LINE__) + +static void comm_free(struct comm *c) +{ +#ifdef MPI + MPI_Comm_free(&c->c); +#endif +} + +static double comm_time(void) +{ +#ifdef MPI + return MPI_Wtime(); +#else + return 0; +#endif +} + +static void comm_barrier(const struct comm *c) +{ +#ifdef MPI + MPI_Barrier(c->c); +#endif +} + +static void comm_recv(const struct comm *c, void *p, size_t n, + uint src, int tag) +{ +#ifdef MPI +# ifndef MPI_STATUS_IGNORE + MPI_Status stat; + MPI_Recv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,&stat); +# else + MPI_Recv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,MPI_STATUS_IGNORE); +# endif +#endif +} + +static void comm_send(const struct comm *c, void *p, size_t n, + uint dst, int tag) +{ +#ifdef MPI + MPI_Send(p,n,MPI_UNSIGNED_CHAR,dst,tag,c->c); +#endif +} + +static void comm_irecv(comm_req *req, const struct comm *c, + void *p, size_t n, uint src, int tag) +{ +#ifdef MPI + MPI_Irecv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,req); +#endif +} + +static void comm_isend(comm_req *req, const struct comm *c, + void *p, size_t n, uint dst, int tag) +{ +#ifdef MPI + MPI_Isend(p,n,MPI_UNSIGNED_CHAR,dst,tag,c->c,req); +#endif +} + +static void comm_wait(comm_req *req, int n) +{ +#ifdef MPI +# ifndef MPI_STATUSES_IGNORE + MPI_Status status[8]; + while(n>=8) MPI_Waitall(8,req,status), req+=8, n-=8; + if(n>0) MPI_Waitall(n,req,status); +# else + MPI_Waitall(n,req,MPI_STATUSES_IGNORE); +# endif +#endif +} + +static void comm_bcast(const struct comm *c, void *p, size_t n, uint root) +{ +#ifdef MPI + MPI_Bcast(p,n,MPI_UNSIGNED_CHAR,root,c->c); +#endif +} + +#endif diff --git a/src/jl/crs.h b/src/jl/crs.h new file mode 100644 index 0000000..e2d0d36 --- /dev/null +++ b/src/jl/crs.h @@ -0,0 +1,24 @@ +#ifndef CRS_H +#define CRS_H + +#if !defined(COMM_H) +#warning "crs.h" requires "comm.h" +#endif + +#define crs_setup PREFIXED_NAME(crs_setup) +#define crs_solve PREFIXED_NAME(crs_solve) +#define crs_stats PREFIXED_NAME(crs_stats) +#define crs_free PREFIXED_NAME(crs_free ) + +struct crs_data; + +struct crs_data *crs_setup( + uint n, const ulong *id, + uint nz, const uint *Ai, const uint *Aj, const double *A, + uint null_space, const struct comm *comm); +void crs_solve(double *x, struct crs_data *data, double *b); +void crs_stats(struct crs_data *data); +void crs_free(struct crs_data *data); + +#endif + diff --git a/src/jl/crs_test.c b/src/jl/crs_test.c new file mode 100644 index 0000000..e5367d2 --- /dev/null +++ b/src/jl/crs_test.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "gs_defs.h" +#include "comm.h" +#include "gs.h" +#include "crs.h" + +void test(const struct comm *const comm) +{ + const double A[16] = { 2, -1, -1, 0, + -1, 2, 0, -1, + -1, 0, 2, -1, + 0, -1, -1, 2 }; + const uint Ai[16] = { 0, 0, 0, 0, + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3 }, + Aj[16] = { 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3 }; + ulong xid[4]; slong uid[4]; + double x[4]={1,1,1,1}, b[4], bmean; + uint i, w, gn, px, py; + + slong *xgid=0; double *xg=0; struct gs_data *gsh; + + struct crs_data *crs; + + w = ceil(sqrt(comm->np)); gn = (w+1)*(w+1); + + if(comm->id==0) printf("arranging procs in a %u x %u square\n", w, w); + + px = comm->id%w, py = comm->id/w; + b[0] = xid[0] = (w+1)*py +px+1; + b[1] = xid[1] = (w+1)*py +px+2; + b[2] = xid[2] = (w+1)*(py+1)+px+1; + b[3] = xid[3] = (w+1)*(py+1)+px+2; + + gn = comm_reduce_slong(comm, gs_max, (const slong*)&xid[3],1); + bmean = comm_reduce_double(comm, gs_add, b,4)/gn; + + gsh = gs_setup((const slong*)xid,4, comm,0,gs_crystal_router,0); + gs(x,gs_double,gs_add,0,gsh,0); + gs(b,gs_double,gs_add,0,gsh,0); + for(i=0;i<4;++i) b[i]=xid[i]-bmean/x[i]; + gs(b,gs_double,gs_add,0,gsh,0); + gs_free(gsh); + + gsh = gs_setup((const slong*)xid,4, comm,1,gs_crystal_router,0); + for(i=0;i<4;++i) uid[i]=comm->id; + gs(uid,gs_slong,gs_min,0,gsh,0); + gs_free(gsh); + for(i=0;i<4;++i) uid[i] = (uid[i]==comm->id?(slong)xid[i]:-(slong)xid[i]); + + if(comm->id==0) { + xgid = tmalloc(slong, gn); + xg = tmalloc(double,gn); + for(i=0;iid?uid:xgid,comm->id?4:gn, comm,0,gs_crystal_router,0); + + + if(comm->id==0) for(i=0;i<4;++i) xg[xid[i]-1]=b[i]; + gs(comm->id?b:xg,gs_double,gs_add, 0, gsh, 0); + if(comm->id==0) for(i=0;iid==0) for(i=0;i<4;++i) xg[xid[i]-1]=x[i]; + gs(comm->id?x:xg,gs_double,gs_add, 0, gsh, 0); + if(comm->id==0) for(i=0;iid==0) free(xg), free(xgid); +} + +int main(int narg, char* arg[]) +{ + comm_ext world; int np; + struct comm comm; +#ifdef MPI + MPI_Init(&narg,&arg); + world = MPI_COMM_WORLD; + MPI_Comm_size(world,&np); +#else + world=0, np=1; +#endif + + comm_init(&comm,world); + test(&comm); + comm_free(&comm); + +#ifdef MPI + MPI_Finalize(); +#endif + + return 0; +} + diff --git a/src/jl/crystal.c b/src/jl/crystal.c new file mode 100644 index 0000000..a0e8135 --- /dev/null +++ b/src/jl/crystal.c @@ -0,0 +1,141 @@ +/*------------------------------------------------------------------------------ + + Crystal Router + + Accomplishes all-to-all communication in log P msgs per proc + The routine is low-level; the format of the input/output is an + array of integers, consisting of a sequence of messages with format: + + target proc + source proc + m + integer + integer + ... + integer (m integers in total) + + Before crystal_router is called, the source of each message should be + set to this proc id; upon return from crystal_router, the target of each + message will be this proc id. + + Example Usage: + + struct crystal cr; + + crystal_init(&cr, &comm); // makes an internal copy of comm + + crystal.data.n = ... ; // total number of integers (not bytes!) + buffer_reserve(&cr.data, crystal.n * sizeof(uint)); + ... // fill cr.data.ptr with messages + crystal_router(&cr); + + crystal_free(&cr); + + ----------------------------------------------------------------------------*/ + +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" + +#define crystal_init PREFIXED_NAME(crystal_init ) +#define crystal_free PREFIXED_NAME(crystal_free ) +#define crystal_router PREFIXED_NAME(crystal_router) + +struct crystal { + struct comm comm; + buffer data, work; +}; + +void crystal_init(struct crystal *p, const struct comm *comm) +{ + comm_dup(&p->comm, comm); + buffer_init(&p->data,1000); + buffer_init(&p->work,1000); +} + +void crystal_free(struct crystal *p) +{ + comm_free(&p->comm); + buffer_free(&p->data); + buffer_free(&p->work); +} + +static void uintcpy(uint *dst, const uint *src, uint n) +{ + if(dst+n<=src) memcpy (dst,src,n*sizeof(uint)); + else if(dst!=src) memmove(dst,src,n*sizeof(uint)); +} + +static uint crystal_move(struct crystal *p, uint cutoff, int send_hi) +{ + uint len, *src, *end; + uint *keep = p->data.ptr, *send; + uint n = p->data.n; + send = buffer_reserve(&p->work,n*sizeof(uint)); + if(send_hi) { /* send hi, keep lo */ + for(src=keep,end=keep+n; src=cutoff) memcpy (send,src,len*sizeof(uint)), send+=len; + else uintcpy(keep,src,len), keep+=len; + } + } else { /* send lo, keep hi */ + for(src=keep,end=keep+n; srcdata.n = keep - (uint*)p->data.ptr; + return send - (uint*)p->work.ptr; +} + +static void crystal_exchange(struct crystal *p, uint send_n, uint targ, + int recvn, int tag) +{ + comm_req req[3]; + uint count[2] = {0,0}, sum, *recv[2]; + + if(recvn) + comm_irecv(&req[1],&p->comm, &count[0],sizeof(uint), targ ,tag); + if(recvn==2) + comm_irecv(&req[2],&p->comm, &count[1],sizeof(uint), p->comm.id-1,tag); + comm_isend(&req[0],&p->comm, &send_n,sizeof(uint), targ,tag); + comm_wait(req,recvn+1); + + sum = p->data.n + count[0] + count[1]; + buffer_reserve(&p->data,sum*sizeof(uint)); + recv[0] = (uint*)p->data.ptr + p->data.n, recv[1] = recv[0] + count[0]; + p->data.n = sum; + + if(recvn) comm_irecv(&req[1],&p->comm, + recv[0],count[0]*sizeof(uint), targ ,tag+1); + if(recvn==2) comm_irecv(&req[2],&p->comm, + recv[1],count[1]*sizeof(uint), p->comm.id-1,tag+1); + comm_isend(&req[0],&p->comm, p->work.ptr,send_n*sizeof(uint), targ,tag+1); + comm_wait(req,recvn+1); +} + +void crystal_router(struct crystal *p) +{ + uint bl=0, bh, nl; + uint id = p->comm.id, n=p->comm.np; + uint send_n, targ, tag = 0; + int send_hi, recvn; + while(n>1) { + nl = (n+1)/2, bh = bl+nl; + send_hi = id +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "crystal.h" + +int main(int narg, char *arg[]) +{ + comm_ext world; int np; + struct comm comm; + struct crystal cr; + uint i,sum, *data, *end; +#ifdef MPI + MPI_Init(&narg,&arg); + world = MPI_COMM_WORLD; + MPI_Comm_size(world,&np); +#else + world=0, np=1; +#endif + + comm_init(&comm,world); + + crystal_init(&cr,&comm); + + cr.data.n = (4+(comm.id&1))*comm.np; + buffer_reserve(&cr.data,cr.data.n*sizeof(uint)); + data = cr.data.ptr; + for(i=0;i %u:",data[1],data[0]); + for(i=0;i /* sprintf, vfprintf, stdout */ +#include /* va_list, va_start, ... */ +#include /* exit */ +#include /* memcpy, and str* functions in comm_fail */ +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" + +#define nek_exitt FORTRAN_UNPREFIXED(exitt,EXITT) +void die(int status) +{ +#ifdef NO_NEK_EXITT + if(comm_gbl_id==0) exit(status); else for(;;) ; +#else + exit(status); +#endif +} + +void vdiagnostic(const char *prefix, const char *file, unsigned line, + const char *fmt, va_list ap) +{ + static char buf[2048]; int n,na,i=0; + sprintf(buf,"%s(proc %04d, %s:%d): ",prefix,(int)comm_gbl_id,file,line); + vsprintf(buf+strlen(buf),fmt,ap); + strcat(buf,"\n"); + n=strlen(buf); + while(n && (na=fwrite(buf+i,1,n,stdout))) n-=na, i+=na; + fflush(stdout); +} + +void diagnostic(const char *prefix, const char *file, unsigned line, + const char *fmt, ...) +{ + va_list ap; va_start(ap,fmt); + vdiagnostic(prefix,file,line,fmt,ap); + va_end(ap); +} + +void vfail(int status, const char *file, unsigned line, + const char *fmt, va_list ap) +{ + vdiagnostic("ERROR ",file,line,fmt,ap); + die(status); +} + +void fail(int status, const char *file, unsigned line, + const char *fmt, ...) +{ + va_list ap; va_start(ap,fmt); + vfail(status,file,line,fmt,ap); + va_end(ap); +} diff --git a/src/jl/fail.h b/src/jl/fail.h new file mode 100644 index 0000000..0185110 --- /dev/null +++ b/src/jl/fail.h @@ -0,0 +1,52 @@ +#ifndef FAIL_H +#define FAIL_H + +#if !defined(NAME_H) +#warning "fail.h" requires "name.h" +#endif + +#define die PREFIXED_NAME( die ) +#define vdiagnostic PREFIXED_NAME(vdiagnostic) +#define diagnostic PREFIXED_NAME( diagnostic) +#define vfail PREFIXED_NAME(vfail ) +#define fail PREFIXED_NAME( fail ) + +#ifdef __GNUC__ +# define ATTRBD __attribute__ ((noreturn)) +# define ATTRB4V __attribute__ ((format(printf,4,0))) +# define ATTRB4 __attribute__ ((format(printf,4,5))) +# define ATTRB4DV __attribute__ ((noreturn,format(printf,4,0))) +# define ATTRB4D __attribute__ ((noreturn,format(printf,4,5))) +#else +# define ATTRBD +# define ATTRB4V +# define ATTRB4 +# define ATTRB4DV +# define ATTRB4D +#endif + +#define DEF_FUNS() \ + void die(int status) ATTRBD; \ + void diagnostic(const char *prefix, const char *file, unsigned line, \ + const char *fmt, ...) ATTRB4 ; \ + void fail (int status, const char *file, unsigned line, \ + const char *fmt, ...) ATTRB4D ; +#define VDEF_FUNS() \ + void vdiagnostic(const char *prefix, const char *file, unsigned line, \ + const char *fmt, va_list ap) ATTRB4V ; \ + void vfail (int status, const char *file, unsigned line, \ + const char *fmt, va_list ap) ATTRB4DV ; +DEF_FUNS() +#ifdef va_arg +VDEF_FUNS() +#endif + +#undef VDEF_FUNS +#undef DEF_FUNS +#undef ATTRB4D +#undef ATTRB4DV +#undef ATTRB4 +#undef ATTRB4V +#undef ATTRBD + +#endif diff --git a/src/jl/fcrystal.c b/src/jl/fcrystal.c new file mode 100644 index 0000000..3fe4c9a --- /dev/null +++ b/src/jl/fcrystal.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "comm.h" +#include "crystal.h" +#include "sort.h" +#include "sarray_sort.h" +#include "sarray_transfer.h" + +/*-------------------------------------------------------------------------- + + FORTRAN Interface to crystal router + + integer h, np + MPI_Comm comm + call crystal_setup(h,comm,np) ! set h to handle to new instance + ! it is a runtime error if MPI_Comm_size gives a value different than np + call crystal_free(h) ! release instance + + integer*? ituple(m,max) ! integer type matching sint from "types.h" + call crystal_ituple_transfer(h, ituple,m,n,max, kp) + - moves each column ituple(:,i), 1 <= i <= n, + to proc ituple(kp,i) + - sets n to the number of columns received, + which may be larger than max (indicating loss of n-max columns) + - also sets ituple(kp,i) to the source proc of column ituple(:,i) + + call crystal_ituple_sort(h, ituple,m,n, key,nkey) + - locally sorts columns ituple(:,1...n) in ascending order, + ranked by ituple(key(1),i), + then ituple(key(2),i), + ... + then ituple(key(nkey),i) + - no communication; h used for scratch area + - linear time + - assumes nonnegative integers + + integer*? vi(mi,max) ! integer type matching sint from "types.h" + integer*? vl(ml,max) ! integer type matching slong from "types.h" + real vr(mr,max) + call crystal_tuple_transfer(h,n,max, vi,mi,vl,ml,vr,mr, kp) + - moves each column vi(:,i),vl(:,i),vr(:,i) 1 <= i <= n, + to proc vi(kp,i) + - sets n to the number of columns received, + which may be larger than max (indicating loss of n-max columns) + - also sets vi(kp,i) to the source proc of columns vi(:,i),vl(:,i),vr(:,i) + + call crystal_tuple_sort(h,n, vi,mi,vl,ml,vr,mr, key,nkey) + - locally sorts columns vi/vl/vr (:,1...n) in ascending order, + ranked by vi(key(1),i) [ or vl(key(1)-mi,i) if key(1)>mi ], + then vi(key(2),i) [ or vl(key(2)-mi,i) if key(2)>mi ], + ... + then vi(key(nkey),i) or vl(key(nkey)-mi,i) + - no communication; h used for scratch area + - linear time + - assumes nonnegative integers + - sorting on reals not yet implemented + + --------------------------------------------------------------------------*/ + +#undef crystal_free +#define ccrystal_free PREFIXED_NAME(crystal_free) + +#define fcrystal_setup \ + FORTRAN_NAME(crystal_setup ,CRYSTAL_SETUP ) +#define fcrystal_ituple_sort \ + FORTRAN_NAME(crystal_ituple_sort ,CRYSTAL_ITUPLE_SORT ) +#define fcrystal_tuple_sort \ + FORTRAN_NAME(crystal_tuple_sort ,CRYSTAL_TUPLE_SORT ) +#define fcrystal_ituple_transfer \ + FORTRAN_NAME(crystal_ituple_transfer,CRYSTAL_ITUPLE_TRANSFER) +#define fcrystal_tuple_transfer \ + FORTRAN_NAME(crystal_tuple_transfer ,CRYSTAL_TUPLE_TRANSFER ) +#define fcrystal_free \ + FORTRAN_NAME(crystal_free ,CRYSTAL_FREE ) + +static struct crystal **handle_array = 0; +static int handle_max = 0; +static int handle_n = 0; + +void fcrystal_setup(sint *handle, const MPI_Fint *comm, const sint *np) +{ + struct crystal *p; + if(handle_n==handle_max) + handle_max+=handle_max/2+1, + handle_array=trealloc(struct crystal*,handle_array,handle_max); + handle_array[handle_n]=p=tmalloc(struct crystal,1); + comm_init_check(&p->comm, *comm, *np); + buffer_init(&p->data,1000); + buffer_init(&p->work,1000); + *handle = handle_n++; +} + +#define CHECK_HANDLE(func) do \ + if(*handle<0 || *handle>=handle_n || !handle_array[*handle]) \ + fail(1,__FILE__,__LINE__,func ": invalid handle"); \ +while(0) + +void fcrystal_ituple_sort(const sint *handle, + sint A[], const sint *m, const sint *n, + const sint keys[], const sint *nkey) +{ + const size_t size = (*m)*sizeof(sint); + sint nk = *nkey; + buffer *buf; + CHECK_HANDLE("crystal_ituple_sort"); + buf = &handle_array[*handle]->data; + if(--nk>=0) { + sortp(buf,0, (uint*)&A[keys[nk]-1],*n,size); + while(--nk>=0) + sortp(buf,1, (uint*)&A[keys[nk]-1],*n,size); + sarray_permute_buf_(ALIGNOF(sint),size,A,*n, buf); + } +} + +void fcrystal_tuple_sort(const sint *const handle, const sint *const n, + sint Ai[], const sint *const mi, + slong Al[], const sint *const ml, + double Ad[], const sint *const md, + const sint keys[], const sint *const nkey) +{ + const size_t size_i = (*mi)*sizeof(sint), + size_l = (*ml)*sizeof(slong), + size_d = (*md)*sizeof(double); + int init=0; + sint nk = *nkey; + buffer *buf; + CHECK_HANDLE("crystal_tuple_sort"); + buf = &handle_array[*handle]->data; + if(nk<=0) return; + while(--nk>=0) { + sint k = keys[nk]-1; + if(k<0 || k>=*mi+*ml) + fail(1,__FILE__,__LINE__,"crystal_tuple_sort: invalid key"); + else if(k<*mi) sortp (buf,init, (uint *)&Ai[k], *n,size_i); + else sortp_long(buf,init, (ulong*)&Al[k-*mi],*n,size_l); + init=1; + } + if(*mi) sarray_permute_buf_(ALIGNOF(sint ),size_i,Ai,*n, buf); + if(*ml) sarray_permute_buf_(ALIGNOF(slong ),size_l,Al,*n, buf); + if(*md) sarray_permute_buf_(ALIGNOF(double),size_d,Ad,*n, buf); +} + +void fcrystal_ituple_transfer(const sint *handle, + sint A[], const sint *m, sint *n, + const sint *nmax, const sint *proc_key) +{ + struct array ar, *const ar_ptr = &ar; + const unsigned size=(*m)*sizeof(sint); + CHECK_HANDLE("crystal_ituple_transfer"); + ar.ptr=A, ar.n=*n, ar.max=*nmax; + *n = sarray_transfer_many(&ar_ptr,&size,1, 1,0,1,(*proc_key-1)*sizeof(sint), + (uint*)&A[*proc_key-1],size, handle_array[*handle]); +} + +void fcrystal_tuple_transfer( + const sint *const handle, sint *const n, const sint *const max, + sint Ai[], const sint *const mi, + slong Al[], const sint *const ml, + double Ad[], const sint *const md, + const sint *const proc_key) +{ + struct array ar_i, ar_l, ar_d, *ar[3]; + unsigned size[3]; + CHECK_HANDLE("crystal_tuple_transfer"); + size[0]=*mi*sizeof(sint); + size[1]=*ml*sizeof(slong); + size[2]=*md*sizeof(double); + ar[0]=&ar_i, ar[1]=&ar_l, ar[2]=&ar_d; + ar_i.ptr=Ai,ar_l.ptr=Al,ar_d.ptr=Ad; + ar_i.n=ar_l.n=ar_d.n = *n; + ar_i.max=ar_l.max=ar_d.max=*max; + *n = sarray_transfer_many(ar,size,3, 1,0,1,(*proc_key-1)*sizeof(sint), + (uint*)&Ai[*proc_key-1],size[0], handle_array[*handle]); +} + +void fcrystal_free(sint *handle) +{ + CHECK_HANDLE("crystal_free"); + ccrystal_free(handle_array[*handle]); + free(handle_array[*handle]); + handle_array[*handle] = 0; +} + + diff --git a/src/jl/gen_poly_imp.c b/src/jl/gen_poly_imp.c new file mode 100644 index 0000000..21a7410 --- /dev/null +++ b/src/jl/gen_poly_imp.c @@ -0,0 +1,227 @@ +#include +#include +#include + +#define PREC_BITS 256 +#define DIGITS 50 + +#define GLL_LAG_FIX_MAX 16 + +#if 1 +# define STATIC "static " +#else +# define STATIC "" +#endif + + +#define PI 3.1415926535897932384626433832795028841971693993751058209749445923 + +#define DECLARE_1VAR(a) static int init=0; static mpf_t a; \ + if(!init) init=1, mpf_init(a) +#define DECLARE_2VARS(a,b) static int init=0; static mpf_t a,b; \ + if(!init) init=1, mpf_init(a), mpf_init(b) +#define DECLARE_3VARS(a,b,c) static int init=0; static mpf_t a,b,c; \ + if(!init) init=1, mpf_init(a), mpf_init(b), \ + mpf_init(c) +#define DECLARE_4VARS(a,b,c,d) static int init=0; static mpf_t a,b,c,d; \ + if(!init) init=1, mpf_init(a), mpf_init(b), \ + mpf_init(c), mpf_init(d) + +static int is_small(const mpf_t x, const mpf_t y) { + DECLARE_2VARS(xa,ya); + mpf_abs(xa,x); + mpf_abs(ya,y); + mpf_div_2exp(ya,ya,PREC_BITS-mp_bits_per_limb); + return mpf_cmp(xa,ya) < 0; +} + +typedef void fun_3term(mpf_t Pn, int n, const mpf_t x); + +#define DECLARE_THREE_TERM(name, i0_init, init_Ps, a_ip1,a_i,a_im1) \ +static void name(mpf_t Pn, int n, const mpf_t x) \ +{ \ + int i, i0_init; \ + DECLARE_4VARS(a,b,P_im1,P_i); \ + init_Ps; \ + for(i=i0+1; ii0?P_i:P_im1); \ +} + +DECLARE_THREE_TERM(legendre, i0=0,(mpf_set_ui(P_im1,1),mpf_set (P_i,x)), + i+1, 2*i+1, i ) +DECLARE_THREE_TERM(legendre_d1, i0=0,(mpf_set_ui(P_im1,0),mpf_set_ui(P_i,1)), + i , 2*i+1, i+1) +DECLARE_THREE_TERM(legendre_d2, i0=1,(mpf_set_ui(P_im1,0),mpf_set_ui(P_i,3)), + i-1, 2*i+1, i+2) + +static void newton(mpf_t x, double seed, + fun_3term *fun, fun_3term *der, int n) +{ + DECLARE_3VARS(ox,f,df); + mpf_set_d(x, seed); + do { + mpf_set(ox, x); + fun(f, n,x), der(df, n,x), mpf_div(f, f,df), mpf_sub(x, x,f); + } while(!is_small(f,x)); + fun( f, n,x), der(df, n,x), mpf_div(f, f,df), mpf_sub(x, x,f); +} + +static void gauss_node(mpf_t z, int n, int i) { + if( (n&1) && i==n/2 ) mpf_set_ui(z,0); + else newton(z, cos( (2*n-2*i-1)*(PI/2)/n ), legendre,legendre_d1,n); +} + +static void lobatto_node(mpf_t z, int n, int i) { + if( (n&1) && i==n/2 ) mpf_set_ui(z,0); + else if(i==0) mpf_set_d(z,-(double)1); + else if(i==n-1) mpf_set_ui(z,1); + else newton(z, cos( (n-1-i)*PI/(n-1) ), legendre_d1,legendre_d2,n-1); +} + +#define PRINT_LIST(i, i0,nline,n, printi,sep,sepline) \ + do { \ + int i; \ + for(i=i0;i3) { + printf("static const double gllz_%02d[%2d] = {\n ",n,n/2-1); + for(i=1;i<=n/2-1;++i) { + lobatto_node(z, n,n-1-i); + if(i!=1) printf(",\n "); + gmp_printf("%.*Fg",DIGITS,z); + } + puts("\n};\n"); + } + printf(STATIC "void gll_lag_%02d(double *restrict p, double *restrict data,\n" + " unsigned n, int d, double xh)\n{\n",n); + printf(" const double *restrict w = data;\n"); + printf(" const double x = xh*2;\n"); + #define PRINT_D(i) do { \ + printf("d%02d=x",i); \ + if(2*i+1==n) printf(" "); \ + else if(i==0) printf("+2 "); \ + else if(i==n-1) printf("-2 "); \ + else if(i=n-2?printf(" %d",n-1-(i)):printf("v1_%02d",i)) + #define PRINT_U2(i) (i<=1 ?printf(" 0"): \ + (i==2 ?printf(" 2"):printf("u2_%02d",i))) + #define PRINT_V2(i) (i>=n-2?printf(" 0"): \ + (i==n-3?printf(" 2"):printf("v2_%02d",i))) + printf("%s",";\n const double "); + PRINT_LIST(i, 1,3,n, + (PRINT_U0(i),putchar('='),PRINT_U0(i-1),printf("*d%02d",i-1)), + ",",",\n "); + printf("%s",";\n const double "); + PRINT_LIST(i, 1,3,n, + (PRINT_V0(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V0(n-i)), + ",",",\n "); + printf("%s",";\n "); + PRINT_LIST(i, 0,3,n, + (printf("p[%2d]=w[%2d]*",i,i),PRINT_U0(i),putchar('*'), + PRINT_V0(i)),"; ",";\n "); + puts(";\n if(d>0) {"); + if(n>2) { + printf("%s"," const double "); + PRINT_LIST(i, 2,2,n, + (PRINT_U1(i),putchar('='),PRINT_U1(i-1),printf("*d%02d",i-1), + putchar('+'),PRINT_U0(i-1)), + ",",",\n "); + printf("%s",";\n const double "); + PRINT_LIST(i, 2,2,n, + (PRINT_V1(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V1(n-i), + putchar('+'),PRINT_V0(n-i)), + ",",",\n "); + puts(";"); + } + for(i=0;i1) {"); + if(n>3) { + printf("%s"," const double "); + PRINT_LIST(i, 3,2,n, + (PRINT_U2(i),putchar('='),PRINT_U2(i-1),printf("*d%02d",i-1), + printf("+2*"),PRINT_U1(i-1)), + ",",",\n "); + printf("%s",";\n const double "); + PRINT_LIST(i, 3,2,n, + (PRINT_V2(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V2(n-i), + printf("+2*"),PRINT_V1(n-i)), + ",",",\n "); + puts(";"); + } + if(n<3) for(i=0;i1) + PRINT_U2(i),putchar('*'),PRINT_V0(i); + else printf(" "); + if(i>0 && i + +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" + +#ifdef _OPENMP +#include "omp.h" +#endif + +#define gs_op gs_op_t /* fix conflict with fortran */ + +#include "gs_defs.h" +#include "gs_local.h" +#include "comm.h" +#include "mem.h" +#include "sort.h" +#include "crystal.h" +#include "sarray_sort.h" +#include "sarray_transfer.h" + +#define gs PREFIXED_NAME(gs ) +#define gs_vec PREFIXED_NAME(gs_vec ) +#define gs_many PREFIXED_NAME(gs_many ) +#define gs_setup PREFIXED_NAME(gs_setup ) +#define gs_free PREFIXED_NAME(gs_free ) +#define gs_unique PREFIXED_NAME(gs_unique) + +GS_DEFINE_DOM_SIZES() + +typedef enum { mode_plain, mode_vec, mode_many, + mode_dry_run } gs_mode; + +static buffer static_buffer = null_buffer; + +static void gather_noop( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{} + +static void scatter_noop( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{} + +static void init_noop( + void *out, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{} + +/*------------------------------------------------------------------------------ + Topology Discovery +------------------------------------------------------------------------------*/ + +struct gs_topology { + ulong total_shared; /* number of globally unique shared ids */ + struct array nz; /* array of nonzero_id's, grouped by id, + sorted by primary index, then flag, then index */ + struct array sh; /* array of shared_id's, arbitrary ordering */ + struct array pr; /* array of primary_shared_id's */ +}; + +static void gs_topology_free(struct gs_topology *top) +{ + array_free(&top->pr); + array_free(&top->sh); + array_free(&top->nz); +} + +/************** Local topology **************/ + +/* nonzero_ids (local part) + + Creates an array of s_nonzeros, one per nonzero in user id array. The + output array is grouped by id. Within each group, non-flagged entries come + first; otherwise the entries within the group are sorted by the index into + the user id array. The first index in each group is the primary index, and + is stored along with each entry. The groups themselves are ordered in + increasing order of the primary index associated with the group (as opposed + to the user id). */ + +struct nonzero_id { + ulong id; uint i, flag, primary; +}; + +static void nonzero_ids(struct array *nz, + const slong *id, const uint n, buffer *buf) +{ + ulong last_id = -(ulong)1; + uint i, primary = -(uint)1; + struct nonzero_id *row, *end; + array_init(struct nonzero_id,nz,n), end=row=nz->ptr; + for(i=0;ii = i; + end->id = abs_id; + end->flag = id_i!=abs_id; + ++end; + } + nz->n = end-row; + array_resize(struct nonzero_id,nz,nz->n); + sarray_sort_2(struct nonzero_id,nz->ptr,nz->n, id,1, flag,0, buf); + for(row=nz->ptr,end=row+nz->n;row!=end;++row) { + ulong this_id = row->id; + if(this_id!=last_id) primary = row->i; + row->primary = primary; + last_id = this_id; + } + sarray_sort(struct nonzero_id,nz->ptr,nz->n, primary,0, buf); +} + +/************** Global topology **************/ + +/* construct list of all unique id's on this proc */ +struct unique_id { ulong id; uint work_proc, src_if; }; +static void unique_ids(struct array *un, const struct array *nz, const uint np) +{ + struct unique_id *un_row; + const struct nonzero_id *nz_row, *nz_end; + array_init(struct unique_id,un,nz->n), un_row=un->ptr; + for(nz_row=nz->ptr,nz_end=nz_row+nz->n;nz_row!=nz_end;++nz_row) { + if(nz_row->i != nz_row->primary) continue; + un_row->id = nz_row->id; + un_row->work_proc = nz_row->id%np; + un_row->src_if = nz_row->flag ? ~nz_row->i : nz_row->i; + ++un_row; + } + un->n = un_row - (struct unique_id*)un->ptr; +} + +/* shared_ids (global part) + + Creates an array of shared_id's from an array of nonzero_id's. Each entry + in the output identifies one id shared with one other processor p. + Note: two procs share an id only when at least one of them has it unflagged. + The primary index is i locally and ri remotely. Bit 1 of flags indicates + the local flag, bit 2 indicates the remote flag. The output has no + particular ordering. + + Also creates an array of primary_shared_id's, one for each shared id. + This struct includes ord, a global rank of the id (arbitrary, but unique). */ + +#define FLAGS_LOCAL 1 +#define FLAGS_REMOTE 2 + +/* i : local primary index + p : remote proc + ri : remote primary index + bi : buffer index (set and used during pw setup) */ +struct shared_id { + ulong id; uint i, p, ri, bi; unsigned flags; +}; + +struct primary_shared_id { + ulong id, ord; uint i; unsigned flag; +}; + + + +struct shared_id_work { ulong id,ord; uint p1, p2, i1f, i2f; }; +static void shared_ids_aux(struct array *sh, struct array *pr, uint pr_n, + struct array *wa, buffer *buf) +{ + const struct shared_id_work *w, *we; + struct shared_id *s; + struct primary_shared_id *p; + ulong last_id = -(ulong)1; + /* translate work array to output arrays */ + sarray_sort(struct shared_id_work,wa->ptr,wa->n, id,1, buf); + array_init(struct shared_id,sh,wa->n), sh->n=wa->n, s=sh->ptr; + array_init(struct primary_shared_id,pr,pr_n), p=pr->ptr; + for(w=wa->ptr,we=w+wa->n;w!=we;++w) { + uint i1f = w->i1f, i2f = w->i2f; + uint i1 = ~i1fid=w->id, s->i=i1, s->p=w->p2, s->ri=i2; + s->flags = ((i2f^i2)&FLAGS_REMOTE) | ((i1f^i1)&FLAGS_LOCAL); + ++s; + if(w->id!=last_id) { + last_id=w->id; + p->id=last_id, p->ord=w->ord, p->i=i1, p->flag=(i1f^i1)&FLAGS_LOCAL; + ++p; + } + } + pr->n = p-(struct primary_shared_id*)pr->ptr; + sarray_sort(struct primary_shared_id,pr->ptr,pr->n, i,0, buf); +} + +static ulong shared_ids(struct array *sh, struct array *pr, + const struct array *nz, struct crystal *cr) +{ + struct array un; struct unique_id *un_row, *un_end, *other; + ulong last_id = -(ulong)1; + ulong ordinal[2], n_shared=0, scan_buf[2]; + struct array wa; struct shared_id_work *w; + uint n_unique; + /* construct list of all unique id's on this proc */ + unique_ids(&un,nz,cr->comm.np); + n_unique = un.n; + /* transfer list to work procs */ + sarray_transfer(struct unique_id,&un, work_proc,1, cr); + /* group by id, put flagged entries after unflagged (within each group) */ + sarray_sort_2(struct unique_id,un.ptr,un.n, id,1, src_if,0, &cr->data); + /* count shared id's */ + for(un_row=un.ptr,un_end=un_row+un.n;un_row!=un_end;++un_row) { + ulong id = un_row->id; + if(~un_row->src_ifsrc_if) continue; + if(id==last_id) continue; + other=un_row+1; + if(other!=un_end&&other->id==id) last_id=id, ++n_shared; + } + comm_scan(ordinal, &cr->comm,gs_slong,gs_add, &n_shared,1, scan_buf); + /* there are ordinal[1] globally shared unique ids; + and ordinal[0] of those are seen by work procs of lower rank; + i.e., this work processor sees the range ordinal[0] + (0,n_shared-1) */ + /* construct list of shared ids */ + last_id = -(ulong)1; + array_init(struct shared_id_work,&wa,un.n), wa.n=0, w=wa.ptr; + for(un_row=un.ptr,un_end=un_row+un.n;un_row!=un_end;++un_row) { + ulong id = un_row->id; + uint p1 = un_row->work_proc, i1f = un_row->src_if; + if(~i1fid==id;++other) { + uint p2 = other->work_proc, i2f = other->src_if; + ulong ord; + if(id!=last_id) last_id=id, ++ordinal[0]; + ord=ordinal[0]-1; + if(wa.n+2>wa.max) + array_reserve(struct shared_id_work,&wa,wa.n+2), + w=(struct shared_id_work*)wa.ptr+wa.n; + w->id=id, w->ord=ord, w->p1=p1, w->p2=p2, w->i1f=i1f, w->i2f=i2f, ++w; + w->id=id, w->ord=ord, w->p1=p2, w->p2=p1, w->i1f=i2f, w->i2f=i1f, ++w; + wa.n+=2; + } + } + /* transfer shared list to source procs */ + sarray_transfer(struct shared_id_work,&wa, p1,0, cr); + /* fill output arrays from work array */ + shared_ids_aux(sh,pr,n_unique,&wa,&cr->data); + array_free(&un); + array_free(&wa); + return ordinal[1]; +} + +static void get_topology(struct gs_topology *top, + const slong *id, uint n, struct crystal *cr) +{ + nonzero_ids(&top->nz,id,n,&cr->data); + top->total_shared = shared_ids(&top->sh,&top->pr, &top->nz,cr); +} + +static void make_topology_unique(struct gs_topology *top, slong *id, + uint pid, buffer *buf) +{ + struct array *const nz=&top->nz, *const sh=&top->sh, *const pr=&top->pr; + struct nonzero_id *pnz; + struct shared_id *pb, *pe, *e, *out; + struct primary_shared_id *q; + + /* flag local non-primaries */ + sarray_sort(struct nonzero_id,nz->ptr,nz->n, i,0, buf); + if(id) { + struct nonzero_id *p,*e; + for(p=nz->ptr,e=p+nz->n;p!=e;++p) + if(p->i != p->primary) id[p->i]=-(slong)p->id,p->flag=1; + } else { + struct nonzero_id *p,*e; + for(p=nz->ptr,e=p+nz->n;p!=e;++p) + if(p->i != p->primary) p->flag=1; + } + sarray_sort(struct nonzero_id,nz->ptr,nz->n, primary,0, buf); + + /* assign owner among shared primaries */ + + /* create sentinel with i = -1 */ + array_reserve(struct shared_id,sh,sh->n+1); + ((struct shared_id*)sh->ptr)[sh->n].i = -(uint)1; + /* in the sorted list of procs sharing a given id, + the owner is chosen to be the j^th unflagged proc, + where j = id mod (length of list) */ + sarray_sort_2(struct shared_id,sh->ptr,sh->n, i,0, p,0, buf); + out=sh->ptr; pnz=top->nz.ptr; + for(pb=sh->ptr,e=pb+sh->n;pb!=e;pb=pe) { + uint i = pb->i, lt=0,gt=0, owner; struct shared_id *p; + while(pnz->i!=i) ++pnz; + /* note: current proc not in list */ + for(pe=pb; pe->i==i && pe->pflags&FLAGS_REMOTE)) ++lt; + for( ; pe->i==i ; ++pe) if(!(pe->flags&FLAGS_REMOTE)) ++gt; + if(!(pb->flags&FLAGS_LOCAL)) { + owner = pb->id%(lt+gt+1); + if(owner==lt) goto make_sh_unique_mine; + if(owner>lt) --owner; + } else + owner = pb->id%(lt+gt); + /* we don't own pb->id */ + if(id) id[i] = -(slong)pb->id; + pnz->flag=1; + /* we only share this id with the owner now; remove the other entries */ + for(p=pb; p!=pe; ++p) if(!(p->flags&FLAGS_REMOTE) && !(owner--)) break; + if(p!=pe) *out=*p, out->flags=FLAGS_LOCAL, ++out; + continue; + make_sh_unique_mine: + /* we own pb->id */ + if(out==pb) { out=pe; for(p=pb; p!=pe; ++p) p->flags=FLAGS_REMOTE; } + else for(p=pb; p!=pe; ++p) *out=*p,out->flags=FLAGS_REMOTE,++out; + } + sh->n = out - ((struct shared_id*)sh->ptr); + + /* set primary_shared_id flags to match */ + ((struct shared_id*)sh->ptr)[sh->n].i = -(uint)1; + sarray_sort(struct shared_id,sh->ptr,sh->n, id,1, buf); + sarray_sort(struct primary_shared_id,pr->ptr,pr->n, id,1, buf); + q=pr->ptr; + for(pb=sh->ptr,e=pb+sh->n;pb!=e;pb=pe) { + uint i=pb->i; + pe=pb; while(pe->i==i) ++pe; + if(q->id!=pb->id) printf("FAIL!!!\n"); + q->flag=pb->flags&FLAGS_LOCAL; + ++q; + } +} + + +/*------------------------------------------------------------------------------ + Divide lists for parallel execution +------------------------------------------------------------------------------*/ + +void sublist(const uint *map, uint ***slPtr) { + + // Iterate over array and count items and lists + + uint i,j; + int itemCount = 0, listCount = 0; + const uint *lmap = map; + while((i=*lmap++)!=-(unsigned int)1) { + listCount++; + + j=*lmap++; + do { + itemCount++; + } while ((j=*lmap++)!=-(unsigned int)1); + } + + // Determine number of threads and lists + + int maxThreads = 1; +#ifdef _OPENMP + maxThreads = omp_get_max_threads(); +#endif + int max = (maxThreads <= listCount) ? maxThreads : listCount; + if (max == 0) max = 1; + + // Setup sublists + + uint *subListData = tmalloc(uint, max+itemCount+2*listCount); + *slPtr = tmalloc(uint*, maxThreads); + uint **subListPtr = *slPtr; + + subListData[0] = -(unsigned int)1; + subListPtr[0] = subListData; + int nextSubList = 1; + + // Populate sublists + + int copyItemCount = 0; + lmap = map; + while((i=*lmap++)!=-(unsigned int)1) { + *subListData++ = i; + + j=*lmap++; + do { + *subListData++ = j; + copyItemCount++; + } while ((j=*lmap++)!=-(unsigned int)1); + *subListData++ = -(unsigned int) 1; + + if ( copyItemCount*max >= itemCount*nextSubList ) { + *subListData= -(unsigned int)1; + + if (copyItemCount != itemCount) { + subListData++; + subListPtr[nextSubList] = subListData; + nextSubList++; + } + } + } + + // Terminate unused sublists + + for (; nextSubList < maxThreads; nextSubList++) { + subListPtr[nextSubList] = subListData; + } + + return; +} + +void subflagged(const uint *map, uint ***slPtr) { + + // Iterate over map and count items + + int count = 0; + const uint *lmap = map; + while(*lmap++ !=-(unsigned int)1) count++; + + // Determine number of threads and sublists + + int maxThreads = 1; +#ifdef _OPENMP + maxThreads = omp_get_max_threads(); +#endif + int maxLists = (maxThreads <= count) ? maxThreads : count; + if (maxLists == 0) maxLists = 1; + + // Setup empty sublists + + uint *subFlaggedData = tmalloc(uint, maxLists+count); + *slPtr = tmalloc(uint*, maxThreads); + + subFlaggedData[0] = -(unsigned int)1; + (*slPtr)[0] = subFlaggedData; + int nextList = 1; + + // Populate sublists + + int copyCount=0; + uint i; + lmap = map; + while((i=*lmap++)!=-(unsigned int)1) { + *subFlaggedData++ = i; + copyCount++; + + if (copyCount*maxLists >= count*nextList) { + *subFlaggedData = -(unsigned int)1; + + if (copyCount != count) { + subFlaggedData++; + (*slPtr)[nextList] = subFlaggedData; + nextList++; + } + } + } + + // Terminate unused sublists + + for (; nextList < maxThreads; nextList++) { + (*slPtr)[nextList] = subFlaggedData; + } + + return; +} + +/*------------------------------------------------------------------------------ + Local setup +------------------------------------------------------------------------------*/ + +/* assumes nz is sorted by primary, then flag, then index */ +static const uint *local_map(const struct array *nz, const int ignore_flagged) +{ + uint *map, *p, count = 1; + const struct nonzero_id *row, *other, *end; +#define DO_COUNT(cond) do \ + for(row=nz->ptr,end=row+nz->n;row!=end;) { \ + ulong row_id = row->id; int any=0; \ + for(other=row+1;other!=end&&other->id==row_id&&cond;++other) \ + any=2, ++count; \ + count+=any, row=other; \ + } while(0) + if(ignore_flagged) DO_COUNT(other->flag==0); else DO_COUNT(1); +#undef DO_COUNT + p = map = tmalloc(uint,count); +#define DO_SET(cond) do \ + for(row=nz->ptr,end=row+nz->n;row!=end;) { \ + ulong row_id = row->id; int any=0; \ + *p++ = row->i; \ + for(other=row+1;other!=end&&other->id==row_id&&cond;++other) \ + any=1, *p++ = other->i; \ + if(any) *p++ = -(uint)1; else --p; \ + row=other; \ + } while(0) + if(ignore_flagged) DO_SET(other->flag==0); else DO_SET(1); +#undef DO_SET + *p = -(uint)1; + return map; +} + +static const uint *flagged_primaries_map(const struct array *nz) +{ + uint *map, *p, count=1; + const struct nonzero_id *row, *end; + for(row=nz->ptr,end=row+nz->n;row!=end;++row) + if(row->i==row->primary && row->flag==1) ++count; + p = map = tmalloc(uint,count); + for(row=nz->ptr,end=row+nz->n;row!=end;++row) + if(row->i==row->primary && row->flag==1) *p++ = row->i; + *p = -(uint)1; + return map; +} + +/*------------------------------------------------------------------------------ + Remote execution and setup +------------------------------------------------------------------------------*/ + +typedef void exec_fun( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf); +typedef void fin_fun(void *data); + +struct gs_remote { + uint buffer_size; + void *data; + exec_fun *exec; + fin_fun *fin; +}; + +typedef void setup_fun(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf); + +/*------------------------------------------------------------------------------ + Pairwise Execution +------------------------------------------------------------------------------*/ +struct pw_comm_data { + uint n; /* number of messages */ + uint *p; /* message source/dest proc */ + uint *size; /* size of message */ + uint total; /* sum of message sizes */ + size_t *offsets; +}; + +struct pw_data { + struct pw_comm_data comm[2]; + const uint *map[2]; + comm_req *req; + uint buffer_size; + uint **submap[2]; +}; + +static char *pw_exec_recvs(char *buf, const unsigned unit_size, + const struct comm *comm, + const struct pw_comm_data *c, comm_req *req) +{ + const uint *p=c->p, *size=c->size; + int i; + char *retVal = buf; + +#ifdef MPITHREADS +#pragma omp for +#endif + for (i = 0; i < c->n; i++) { + comm_irecv(&(req[i]),comm,buf+c->offsets[i]*unit_size,size[i]*unit_size,p[i],p[i]); + } + + if (c->n != 0) { + retVal += c->offsets[c->n-1]*unit_size + size[c->n-1]*unit_size; + } + + return retVal; +} + +static char *pw_exec_sends(char *buf, const unsigned unit_size, + const struct comm *comm, + const struct pw_comm_data *c, comm_req *req) +{ + const uint *p=c->p, *size=c->size; + int i; + char *retVal = buf; + +#ifdef MPITHREADS +#pragma omp for +#endif + for(i = 0; i < c->n; i++) { + comm_isend(&(req[i]),comm,buf+c->offsets[i]*unit_size,size[i]*unit_size,p[i],comm->id); + } + + if (c->n != 0) { + retVal += c->offsets[c->n-1]*unit_size + size[c->n-1]*unit_size; + } + + return retVal; +} + +static void pw_exec( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf) +{ + const struct pw_data *pwd = execdata; + static gs_scatter_fun *const scatter_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop }; + static gs_gather_fun *const gather_from_buf[] = + { &gs_gather, &gs_gather_vec, &gs_gather_vec_to_many, &gather_noop }; + const unsigned recv = 0^transpose, send = 1^transpose; + unsigned unit_size = vn*gs_dom_size[dom]; + +#ifdef MPITHREADS + char *sendbuf; +#else + static char *sendbuf; +#endif + + int thd = 0; + int inp = 0; + #ifdef _OPENMP + thd = omp_get_thread_num(); + inp = omp_in_parallel(); + #endif + + if (inp) { + /* post receives */ +#ifndef MPITHREADS + #pragma omp master +#endif + { + sendbuf = pw_exec_recvs(buf,unit_size,comm,&pwd->comm[recv],pwd->req); + } + #pragma omp barrier + + /* fill send buffer */ + scatter_to_buf[mode](sendbuf,data,vn,(pwd->submap[send])[thd],dom); + #pragma omp barrier + + /* post sends */ +#ifndef MPITHREADS + #pragma omp master +#endif + { + pw_exec_sends(sendbuf,unit_size,comm,&pwd->comm[send], + &pwd->req[pwd->comm[recv].n]); + } + #pragma omp barrier + + #pragma omp master + { + comm_wait(pwd->req,pwd->comm[0].n+pwd->comm[1].n); + } + #pragma omp barrier + + /* gather using recv buffer */ + gather_from_buf[mode](data,buf,vn,(pwd->submap[recv])[thd],dom,op); + } else { + /* post receives */ + sendbuf = pw_exec_recvs(buf,unit_size,comm,&pwd->comm[recv],pwd->req); + /* fill send buffer */ + scatter_to_buf[mode](sendbuf,data,vn,pwd->map[send],dom); + /* post sends */ + pw_exec_sends(sendbuf,unit_size,comm,&pwd->comm[send], + &pwd->req[pwd->comm[recv].n]); + comm_wait(pwd->req,pwd->comm[0].n+pwd->comm[1].n); + /* gather using recv buffer */ + gather_from_buf[mode](data,buf,vn,pwd->map[recv],dom,op); + } + +} + +/*------------------------------------------------------------------------------ + Pairwise setup +------------------------------------------------------------------------------*/ +static void pw_comm_setup(struct pw_comm_data *data, struct array *sh, + const unsigned flags_mask, buffer *buf) +{ + uint n=0,count=0, lp=-(uint)1; + struct shared_id *s, *se; + /* sort by remote processor and id (a globally consistent ordering) */ + sarray_sort_2(struct shared_id,sh->ptr,sh->n, p,0, id,1, buf); + /* assign index into buffer */ + for(s=sh->ptr,se=s+sh->n;s!=se;++s) { + if(s->flags&flags_mask) { s->bi = -(uint)1; continue; } + s->bi = count++; + if(s->p!=lp) lp=s->p, ++n; + } + data->n = n; + data->p = tmalloc(uint,2*n); + data->size = data->p + n; + data->total = count; + n = 0, lp=-(uint)1; + for(s=sh->ptr,se=s+sh->n;s!=se;++s) { + if(s->flags&flags_mask) continue; + if(s->p!=lp) { + lp=s->p; + if(n!=0) data->size[n-1] = count; + count=0, data->p[n++]=lp; + } + ++count; + } + if(n!=0) data->size[n-1] = count; + + data->offsets = malloc(sizeof(size_t)*data->n); + int i; + size_t len = 0; + for (i = 0; i < data->n; i++) { + data->offsets[i] = len; + len += data->size[i]; + } +} + +static void pw_comm_free(struct pw_comm_data *data) { free(data->p); free(data->offsets);} + +/* assumes that the bi field of sh is set */ +static const uint *pw_map_setup(struct array *sh, buffer *buf) +{ + uint count=0, *map, *p; + struct shared_id *s, *se; + sarray_sort(struct shared_id,sh->ptr,sh->n, i,0, buf); + /* calculate map size */ + count=1; + for(s=sh->ptr,se=s+sh->n;s!=se;) { + uint i=s->i; + if(s->bi==-(uint)1) { ++s; continue; } + count+=3; + for(++s;s!=se&&s->i==i;++s) if(s->bi!=-(uint)1) ++count; + } + /* write map */ + p = map = tmalloc(uint,count); + for(s=sh->ptr,se=s+sh->n;s!=se;) { + uint i=s->i; + if(s->bi==-(uint)1) { ++s; continue; } + *p++ = i, *p++ = s->bi; + for(++s;s!=se&&s->i==i;++s) if(s->bi!=-(uint)1) *p++ = s->bi; + *p++ = -(uint)1; + } + *p = -(uint)1; + return map; +} + + +static struct pw_data *pw_setup_aux(struct array *sh, buffer *buf) +{ + struct pw_data *pwd = tmalloc(struct pw_data,1); + + /* default behavior: receive only remotely unflagged data */ + pw_comm_setup(&pwd->comm[0],sh, FLAGS_REMOTE, buf); + pwd->map[0] = pw_map_setup(sh, buf); + sublist(pwd->map[0], &(pwd->submap[0])); + + /* default behavior: send only locally unflagged data */ + pw_comm_setup(&pwd->comm[1],sh, FLAGS_LOCAL, buf); + pwd->map[1] = pw_map_setup(sh, buf); + sublist(pwd->map[1], &(pwd->submap[1])); + + pwd->req = tmalloc(comm_req,pwd->comm[0].n+pwd->comm[1].n); + pwd->buffer_size = pwd->comm[0].total + pwd->comm[1].total; + + return pwd; +} + +static void pw_free(struct pw_data *data) +{ + pw_comm_free(&data->comm[0]); + pw_comm_free(&data->comm[1]); + free((uint*)data->map[0]); + free((uint*)data->map[1]); + free(data->req); + free(data); + + free((data->submap[0])[0]); + free(data->submap[0]); + free((data->submap[1])[0]); + free(data->submap[1]); +} + +static void pw_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + struct pw_data *pwd = pw_setup_aux(&top->sh,buf); + r->buffer_size = pwd->buffer_size; + r->data = pwd; + r->exec = (exec_fun*)&pw_exec; + r->fin = (fin_fun*)&pw_free; +} + +/*------------------------------------------------------------------------------ + Crystal-Router Execution +------------------------------------------------------------------------------*/ +struct cr_stage { + const uint *scatter_map, *gather_map; + uint size_r, size_r1, size_r2; + uint size_sk, size_s, size_total; + uint p1, p2; + unsigned nrecvn; +}; + +struct cr_data { + struct cr_stage *stage[2]; + unsigned nstages; + uint buffer_size, stage_buffer_size; +}; + +static void cr_exec( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf) +{ + const struct cr_data *crd = execdata; + static gs_scatter_fun *const scatter_user_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop }; + static gs_scatter_fun *const scatter_buf_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec, &gs_scatter }; + static gs_scatter_fun *const scatter_buf_to_user[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec_to_many, &scatter_noop }; + static gs_gather_fun *const gather_buf_to_user[] = + { &gs_gather, &gs_gather_vec, &gs_gather_vec_to_many, &gather_noop }; + static gs_gather_fun *const gather_buf_to_buf[] = + { &gs_gather, &gs_gather_vec, &gs_gather_vec, &gs_gather }; + const unsigned unit_size = vn*gs_dom_size[dom], nstages=crd->nstages; + unsigned k; + char *sendbuf, *buf_old, *buf_new; + const struct cr_stage *stage = crd->stage[transpose]; + buf_old = buf; + buf_new = buf_old + unit_size*crd->stage_buffer_size; + /* crystal router */ + for(k=0;knp+k); + if(stage[k].nrecvn==2) + comm_irecv(&req[2],comm,buf_new+unit_size*stage[k].size_r1, + unit_size*stage[k].size_r2, stage[k].p2, comm->np+k); + sendbuf = buf_new+unit_size*stage[k].size_r; + if(k==0) + scatter_user_to_buf[mode](sendbuf,data,vn,stage[0].scatter_map,dom); + else + scatter_buf_to_buf[mode](sendbuf,buf_old,vn,stage[k].scatter_map,dom), + gather_buf_to_buf [mode](sendbuf,buf_old,vn,stage[k].gather_map ,dom,op); + + comm_isend(&req[0],comm,sendbuf,unit_size*stage[k].size_s, + stage[k].p1, comm->np+k); + comm_wait(&req[0],1+stage[k].nrecvn); + { char *t = buf_old; buf_old=buf_new; buf_new=t; } + } + scatter_buf_to_user[mode](data,buf_old,vn,stage[k].scatter_map,dom); + gather_buf_to_user [mode](data,buf_old,vn,stage[k].gather_map ,dom,op); +} + +/*------------------------------------------------------------------------------ + Crystal-Router setup +------------------------------------------------------------------------------*/ +static void cr_schedule(struct cr_data *data, const struct comm *comm) +{ + const uint id = comm->id; + uint bl=0, n=comm->np; + unsigned k = 0; + while(n>1) { + uint nl = (n+1)/2, bh = bl+nl; + if(idnstages = k; + data->stage[0] = tmalloc(struct cr_stage,2*(k+1)); + data->stage[1] = data->stage[0] + (k+1); + bl=0, n=comm->np, k=0; + while(n>1) { + uint nl = (n+1)/2, bh = bl+nl; + uint targ; unsigned recvn; + recvn = 1, targ = n-1-(id-bl)+bl; + if(id==targ) targ=bh, recvn=0; + if(n&1 && id==bh) recvn=2; + data->stage[1][k].nrecvn=data->stage[0][k].nrecvn=recvn; + data->stage[1][k].p1 =data->stage[0][k].p1 =targ; + data->stage[1][k].p2 =data->stage[0][k].p2 =comm->id-1; + if(idmax; + struct crl_id *w = cw->ptr; + struct shared_id *s, *se; + +#define CW_ADD(aid,ap,ari,asi) do { \ + if(cw_n==cw_max) \ + array_reserve(struct crl_id,cw,cw_n+1),cw_max=cw->max, \ + w=(struct crl_id*)cw->ptr+cw_n; \ + w->id=aid, w->p=ap, w->ri=ari, w->si=asi; \ + ++w, ++cw_n; \ + } while(0) + + for(s=sh->ptr,se=s+sh->n;s!=se;++s) { + int send = (s->flags&send_mask)==0; + int recv = (s->flags&recv_mask)==0; + if(s->i!=last_i) last_i=s->i, added_myself=0; + if(!added_myself && recv && (s->flags&FLAGS_LOCAL)==0) { + added_myself=1; + CW_ADD(s->id,this_p,s->i,s->i); + } + if(send) CW_ADD(s->id,s->p,s->ri,s->i); + } + cw->n=cw_n; +#undef CW_ADD +} + +static void crl_maps(struct cr_stage *stage, struct array *cw, buffer *buf) +{ + struct crl_id *w, *we, *other; + uint scount=1, gcount=1, *sp, *gp; + sarray_sort_2(struct crl_id,cw->ptr,cw->n, bi,0, si,0, buf); + for(w=cw->ptr,we=w+cw->n;w!=we;w=other) { + uint bi=w->bi,any=0,si=w->si; + scount+=3; + for(other=w+1;other!=we&&other->bi==bi;++other) + if(other->si!=si) si=other->si, any=2, ++gcount; + gcount+=any; + } + stage->scatter_map = sp = tmalloc(uint,scount+gcount); + stage->gather_map = gp = sp + scount; + for(w=cw->ptr,we=w+cw->n;w!=we;w=other) { + uint bi=w->bi,any=0,si=w->si; + *sp++ = w->si, *sp++ = bi; + *gp++ = bi; + for(other=w+1;other!=we&&other->bi==bi;++other) + if(other->si!=si) si=other->si, any=1, *gp++ = si; + if(any) *gp++ = -(uint)1; else --gp; + *sp++ = -(uint)1; + } + *sp=-(uint)1, *gp=-(uint)1; +} + +static uint crl_work_label(struct array *cw, struct cr_stage *stage, + uint cutoff, int send_hi, buffer *buf) +{ + struct crl_id *w, *we, *start; + uint nsend, nkeep = 0, nks = 0, bi=0; + /* here w->send has a reverse meaning */ + if(send_hi) for(w=cw->ptr,we=w+cw->n;w!=we;++w) w->send = w->p< cutoff; + else for(w=cw->ptr,we=w+cw->n;w!=we;++w) w->send = w->p>=cutoff; + sarray_sort_2(struct crl_id,cw->ptr,cw->n, id,1, send,0, buf); + for(start=cw->ptr,w=start,we=w+cw->n;w!=we;++w) { + nkeep += w->send; + if(w->id!=start->id) start=w; + if(w->send!=start->send) w->send=0,w->bi=1, ++nks; else w->bi=0; + } + nsend = cw->n-nkeep; + /* assign indices; sent ids have priority (hence w->send is reversed) */ + sarray_sort(struct crl_id,cw->ptr,cw->n, send,0, buf); + for(start=cw->ptr,w=start,we=w+nsend+nks;w!=we;++w) { + if(w->id!=start->id) start=w, ++bi; + if(w->bi!=1) w->send=1; /* switch back to the usual semantics */ + w->bi = bi; + } + stage->size_s = nsend+nks==0 ? 0 : bi+1; + for(we=(struct crl_id*)cw->ptr+cw->n;w!=we;++w) { + if(w->id!=start->id) start=w, ++bi; + w->send = 0; /* switch back to the usual semantics */ + w->bi = bi; + } + stage->size_sk = cw->n==0 ? 0 : bi+1; + crl_maps(stage,cw,buf); + return nsend; +} + +static void crl_bi_to_si(struct crl_id *w, uint n, uint v) { + for(;n;--n) w->si=w->bi+v, ++w; +} + +static void crl_ri_to_bi(struct crl_id *w, uint n) { + for(;n;--n) w->bi=w->ri, ++w; +} + +static uint cr_learn(struct array *cw, struct cr_stage *stage, + const struct comm *comm, buffer *buf) +{ + comm_req req[3]; + const uint id = comm->id; + uint bl=0, n=comm->np; + uint size_max=0; + uint tag = comm->np; + while(n>1) { + uint nl = (n+1)/2, bh = bl+nl; + uint nkeep, nsend[2], nrecv[2][2] = {{0,0},{0,0}}; + struct crl_id *wrecv[2], *wsend; + nsend[0] = crl_work_label(cw,stage,bh,idsize_s; + nkeep = cw->n - nsend[0]; + + if(stage->nrecvn ) comm_irecv(&req[1],comm,nrecv[0],2*sizeof(uint), + stage->p1,tag); + if(stage->nrecvn==2) comm_irecv(&req[2],comm,nrecv[1],2*sizeof(uint), + stage->p2,tag); + comm_isend(&req[0],comm,nsend,2*sizeof(uint),stage->p1,tag); + comm_wait(req,1+stage->nrecvn),++tag; + + stage->size_r1 = nrecv[0][1], stage->size_r2 = nrecv[1][1]; + stage->size_r = stage->size_r1 + stage->size_r2; + stage->size_total = stage->size_r + stage->size_sk; + if(stage->size_total>size_max) size_max=stage->size_total; + + array_reserve(struct crl_id,cw,cw->n+nrecv[0][0]+nrecv[1][0]); + wrecv[0] = cw->ptr, wrecv[0] += cw->n, wrecv[1] = wrecv[0]+nrecv[0][0]; + wsend = cw->ptr, wsend += nkeep; + if(stage->nrecvn ) + comm_irecv(&req[1],comm,wrecv[0],nrecv[0][0]*sizeof(struct crl_id), + stage->p1,tag); + if(stage->nrecvn==2) + comm_irecv(&req[2],comm,wrecv[1],nrecv[1][0]*sizeof(struct crl_id), + stage->p2,tag); + sarray_sort_2(struct crl_id,cw->ptr,cw->n, send,0, bi,0, buf); + comm_isend(&req[0],comm,wsend,nsend[0]*sizeof(struct crl_id),stage->p1,tag); + comm_wait(req,1+stage->nrecvn),++tag; + + crl_bi_to_si(cw->ptr,nkeep,stage->size_r); + if(stage->nrecvn) crl_bi_to_si(wrecv[0],nrecv[0][0],0); + if(stage->nrecvn==2) crl_bi_to_si(wrecv[1],nrecv[1][0],stage->size_r1); + memmove(wsend,wrecv[0],(nrecv[0][0]+nrecv[1][0])*sizeof(struct crl_id)); + cw->n += nrecv[0][0] + nrecv[1][0]; + cw->n -= nsend[0]; + + if(idptr,cw->n); + crl_maps(stage,cw,buf); + return size_max; +} + +static struct cr_data *cr_setup_aux( + struct array *sh, const struct comm *comm, buffer *buf) +{ + uint size_max[2]; + struct array cw = null_array; + struct cr_data *crd = tmalloc(struct cr_data,1); + + /* default behavior: receive only remotely unflagged data */ + /* default behavior: send only locally unflagged data */ + + cr_schedule(crd,comm); + + sarray_sort(struct shared_id,sh->ptr,sh->n, i,0, buf); + crl_work_init(&cw,sh, FLAGS_LOCAL , comm->id); + size_max[0]=cr_learn(&cw,crd->stage[0],comm,buf); + crl_work_init(&cw,sh, FLAGS_REMOTE, comm->id); + size_max[1]=cr_learn(&cw,crd->stage[1],comm,buf); + + crd->stage_buffer_size = size_max[1]>size_max[0]?size_max[1]:size_max[0]; + + array_free(&cw); + + crd->buffer_size = 2*crd->stage_buffer_size; + return crd; +} + +static void cr_free_stage_maps(struct cr_stage *stage, unsigned kmax) +{ + unsigned k; + for(k=0; kscatter_map); + ++stage; + } + free((uint*)stage->scatter_map); +} + +static void cr_free(struct cr_data *data) +{ + cr_free_stage_maps(data->stage[0],data->nstages); + cr_free_stage_maps(data->stage[1],data->nstages); + free(data->stage[0]); + free(data); +} + +static void cr_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + struct cr_data *crd = cr_setup_aux(&top->sh,comm,buf); + r->buffer_size = crd->buffer_size; + r->data = crd; + r->exec = (exec_fun*)&cr_exec; + r->fin = (fin_fun*)&cr_free; +} + +/*------------------------------------------------------------------------------ + All-reduce Execution +------------------------------------------------------------------------------*/ +struct allreduce_data { + const uint *map_to_buf[2], *map_from_buf[2]; + uint buffer_size; +}; + +static void allreduce_exec( + void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, const void *execdata, const struct comm *comm, char *buf) +{ + const struct allreduce_data *ard = execdata; + static gs_scatter_fun *const scatter_to_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop }; + static gs_scatter_fun *const scatter_from_buf[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec_to_many, &scatter_noop }; + uint gvn = vn*(ard->buffer_size/2); + unsigned unit_size = gs_dom_size[dom]; + char *ardbuf; + ardbuf = buf+unit_size*gvn; + /* user array -> buffer */ + gs_init_array(buf,gvn,dom,op); + scatter_to_buf[mode](buf,data,vn,ard->map_to_buf[transpose],dom); + /* all reduce */ + comm_allreduce(comm,dom,op, buf,gvn, ardbuf); + /* buffer -> user array */ + scatter_from_buf[mode](data,buf,vn,ard->map_from_buf[transpose],dom); +} + +/*------------------------------------------------------------------------------ + All-reduce setup +------------------------------------------------------------------------------*/ +static const uint *allreduce_map_setup( + struct array *pr, const unsigned flags_mask, int to_buf) +{ + struct primary_shared_id *p, *pe; + uint count=1, *map, *m; + for(p=pr->ptr,pe=p+pr->n;p!=pe;++p) + if((p->flag&flags_mask)==0) count+=3; + m=map=tmalloc(uint,count); + if(to_buf) { + for(p=pr->ptr,pe=p+pr->n;p!=pe;++p) + if((p->flag&flags_mask)==0) + *m++ = p->i, *m++ = p->ord, *m++ = -(uint)1; + } else { + for(p=pr->ptr,pe=p+pr->n;p!=pe;++p) + if((p->flag&flags_mask)==0) + *m++ = p->ord, *m++ = p->i, *m++ = -(uint)1; + } + *m=-(uint)1; + return map; +} + +static struct allreduce_data *allreduce_setup_aux( + struct array *pr, ulong total_shared) +{ + struct allreduce_data *ard = tmalloc(struct allreduce_data,1); + + /* default behavior: reduce only unflagged data, copy to all */ + ard->map_to_buf [0] = allreduce_map_setup(pr,1,1); + ard->map_from_buf[0] = allreduce_map_setup(pr,0,0); + + /* transpose behavior: reduce all data, copy to unflagged */ + ard->map_to_buf [1] = allreduce_map_setup(pr,0,1); + ard->map_from_buf[1] = allreduce_map_setup(pr,1,0); + + ard->buffer_size = total_shared*2; + return ard; +} + +static void allreduce_free(struct allreduce_data *ard) +{ + free((uint*)ard->map_to_buf[0]); + free((uint*)ard->map_to_buf[1]); + free((uint*)ard->map_from_buf[0]); + free((uint*)ard->map_from_buf[1]); + free(ard); +} + +static void allreduce_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + struct allreduce_data *ard = allreduce_setup_aux(&top->pr,top->total_shared); + r->buffer_size = ard->buffer_size; + r->data = ard; + r->exec = (exec_fun*)&allreduce_exec; + r->fin = (fin_fun*)&allreduce_free; +} + +/*------------------------------------------------------------------------------ + Automatic Setup --- dynamically picks the fastest method +------------------------------------------------------------------------------*/ + +static void dry_run_time(double times[3], const struct gs_remote *r, + const struct comm *comm, buffer *buf) +{ + int i; double t; + buffer_reserve(buf,gs_dom_size[gs_double]*r->buffer_size); + for(i= 2;i;--i) + r->exec(0,mode_dry_run,1,gs_double,gs_add,0,r->data,comm,buf->ptr); + comm_barrier(comm); + t = comm_time(); + for(i=10;i;--i) + r->exec(0,mode_dry_run,1,gs_double,gs_add,0,r->data,comm,buf->ptr); + t = (comm_time() - t)/10; + times[0] = t/comm->np, times[1] = t, times[2] = t; + comm_allreduce(comm,gs_double,gs_add, ×[0],1, &t); + comm_allreduce(comm,gs_double,gs_min, ×[1],1, &t); + comm_allreduce(comm,gs_double,gs_max, ×[2],1, &t); +} + +static void auto_setup(struct gs_remote *r, struct gs_topology *top, + const struct comm *comm, buffer *buf) +{ + pw_setup(r, top,comm,buf); + + if(comm->np>1) { + const char *name = "pairwise"; + struct gs_remote r_alt; + double time[2][3]; + + #define DRY_RUN(i,gsr,str) do { \ + if(comm->id==0) printf(" " str ": "); \ + dry_run_time(time[i],gsr,comm,buf); \ + if(comm->id==0) \ + printf("%g %g %g\n",time[i][0],time[i][1],time[i][2]); \ + } while(0) + + #define DRY_RUN_CHECK(str,new_name) do { \ + DRY_RUN(1,&r_alt,str); \ + if(time[1][2]fin(r->data), *r = r_alt; \ + else \ + r_alt.fin(r_alt.data); \ + } while(0) + + DRY_RUN(0, r, "pairwise times (avg, min, max)"); + + cr_setup(&r_alt, top,comm,buf); + DRY_RUN_CHECK( "crystal router ", "crystal router"); + + if(top->total_shared<100000) { + allreduce_setup(&r_alt, top,comm,buf); + DRY_RUN_CHECK( "all reduce ", "allreduce"); + } + + #undef DRY_RUN_CHECK + #undef DRY_RUN + + if(comm->id==0) printf(" used all_to_all method: %s\n",name); + } +} + +/*------------------------------------------------------------------------------ + Main Execution +------------------------------------------------------------------------------*/ +struct gs_data { + struct comm comm; + const uint *map_local[2]; /* 0=unflagged, 1=all */ + const uint *flagged_primaries; + struct gs_remote r; + uint **submap_local[2]; /* 0=unflagged, 1=all */ + uint **subflagged_primaries; +}; + + +static void gs_aux( + void *u, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, unsigned transpose, + struct gs_data *gsh, buffer *buf) +{ + static gs_scatter_fun *const local_scatter[] = + { &gs_scatter, &gs_scatter_vec, &gs_scatter_many, &scatter_noop }; + static gs_gather_fun *const local_gather [] = + { &gs_gather, &gs_gather_vec, &gs_gather_many, &gather_noop }; + static gs_init_fun *const init[] = + { &gs_init, &gs_init_vec, &gs_init_many, &init_noop }; + + + int thd = 0; + int inp = 0; + #ifdef _OPENMP + thd = omp_get_thread_num(); + inp = omp_in_parallel(); + #endif + + if(!buf) buf = &static_buffer; + + #pragma omp single + { + buffer_reserve(buf,vn*gs_dom_size[dom]*gsh->r.buffer_size); + } + + if (inp) { + local_gather [mode](u,u,vn,(gsh->submap_local[0^transpose])[thd],dom,op); + #pragma omp barrier + + if(transpose==0) init[mode](u,vn,(gsh->subflagged_primaries)[thd],dom,op); + #pragma omp barrier + + gsh->r.exec(u,mode,vn,dom,op,transpose,gsh->r.data,&gsh->comm,buf->ptr); + #pragma omp barrier + + local_scatter[mode](u,u,vn,(gsh->submap_local[1^transpose])[thd],dom); + #pragma omp barrier + + } else { + local_gather [mode](u,u,vn,gsh->map_local[0^transpose],dom,op); + if(transpose==0) init[mode](u,vn,gsh->flagged_primaries,dom,op); + gsh->r.exec(u,mode,vn,dom,op,transpose,gsh->r.data,&gsh->comm,buf->ptr); + local_scatter[mode](u,u,vn,gsh->map_local[1^transpose],dom); + } + +} + +void gs(void *u, gs_dom dom, gs_op op, unsigned transpose, + struct gs_data *gsh, buffer *buf) +{ + gs_aux(u,mode_plain,1,dom,op,transpose,gsh,buf); +} + +void gs_vec(void *u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf) +{ + gs_aux(u,mode_vec,vn,dom,op,transpose,gsh,buf); +} + +void gs_many(void *const*u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf) +{ + gs_aux((void*)u,mode_many,vn,dom,op,transpose,gsh,buf); +} + +/*------------------------------------------------------------------------------ + Main Setup +------------------------------------------------------------------------------*/ +typedef enum { gs_pairwise, gs_crystal_router, gs_all_reduce, + gs_auto } gs_method; + + + +static void local_setup(struct gs_data *gsh, const struct array *nz) +{ + gsh->map_local[0] = local_map(nz,1); + gsh->map_local[1] = local_map(nz,0); + gsh->flagged_primaries = flagged_primaries_map(nz); + sublist(gsh->map_local[0], &(gsh->submap_local[0])); + sublist(gsh->map_local[1], &(gsh->submap_local[1])); + subflagged(gsh->flagged_primaries, &(gsh->subflagged_primaries)); +} + +static void gs_setup_aux(struct gs_data *gsh, const slong *id, uint n, + int unique, gs_method method, int verbose) +{ + static setup_fun *const remote_setup[] = + { &pw_setup, &cr_setup, &allreduce_setup, &auto_setup }; + + struct gs_topology top; + struct crystal cr; + + crystal_init(&cr,&gsh->comm); + + get_topology(&top, id,n, &cr); + if(unique) make_topology_unique(&top,0,gsh->comm.id,&cr.data); + + local_setup(gsh,&top.nz); + + if(verbose && gsh->comm.id==0) + printf("gs_setup: %ld unique labels shared\n",(long)top.total_shared); + + remote_setup[method](&gsh->r, &top,&gsh->comm,&cr.data); + + gs_topology_free(&top); + crystal_free(&cr); +} + +struct gs_data *gs_setup(const slong *id, uint n, const struct comm *comm, + int unique, gs_method method, int verbose) +{ + struct gs_data *gsh = tmalloc(struct gs_data,1); + comm_dup(&gsh->comm,comm); + gs_setup_aux(gsh,id,n,unique,method,verbose); + return gsh; +} + +void gs_free(struct gs_data *gsh) +{ + comm_free(&gsh->comm); + free((uint*)gsh->map_local[0]), free((uint*)gsh->map_local[1]); + free((uint*)gsh->flagged_primaries); + gsh->r.fin(gsh->r.data); + free((gsh->submap_local[0])[0]); + free(gsh->submap_local[0]); + free((gsh->submap_local[1])[0]); + free(gsh->submap_local[1]); + free((gsh->subflagged_primaries)[0]); + free(gsh->subflagged_primaries); + free(gsh); +} + +void gs_unique(slong *id, uint n, const struct comm *comm) +{ + struct gs_topology top; + struct crystal cr; + crystal_init(&cr,comm); + get_topology(&top, id,n, &cr); + make_topology_unique(&top,id,comm->id,&cr.data); + gs_topology_free(&top); + crystal_free(&cr); +} + +/*------------------------------------------------------------------------------ + FORTRAN interface +------------------------------------------------------------------------------*/ + +#undef gs_op + +#undef gs_free +#undef gs_setup +#undef gs_many +#undef gs_vec +#undef gs +#define cgs PREFIXED_NAME(gs ) +#define cgs_vec PREFIXED_NAME(gs_vec ) +#define cgs_many PREFIXED_NAME(gs_many ) +#define cgs_setup PREFIXED_NAME(gs_setup) +#define cgs_free PREFIXED_NAME(gs_free ) + +#define fgs_setup FORTRAN_NAME(gs_setup ,GS_SETUP ) +#define fgs FORTRAN_NAME(gs_op ,GS_OP ) +#define fgs_vec FORTRAN_NAME(gs_op_vec ,GS_OP_VEC ) +#define fgs_many FORTRAN_NAME(gs_op_many ,GS_OP_MANY ) +#define fgs_fields FORTRAN_NAME(gs_op_fields,GS_OP_FIELDS) +#define fgs_free FORTRAN_NAME(gs_free ,GS_FREE ) + +static struct gs_data **fgs_info = 0; +static int fgs_max = 0; +static int fgs_n = 0; + +void fgs_setup(sint *handle, const slong id[], const sint *n, + const MPI_Fint *comm, const sint *np) +{ + struct gs_data *gsh; + if(fgs_n==fgs_max) fgs_max+=fgs_max/2+1, + fgs_info=trealloc(struct gs_data*,fgs_info,fgs_max); + gsh=fgs_info[fgs_n]=tmalloc(struct gs_data,1); + comm_init_check(&gsh->comm,*comm,*np); + gs_setup_aux(gsh,id,*n,0,gs_pairwise,1); + *handle = fgs_n++; +} + +static void fgs_check_handle(sint handle, const char *func, unsigned line) +{ + if(handle<0 || handle>=fgs_n || !fgs_info[handle]) + fail(1,__FILE__,line,"%s: invalid handle", func); +} + +static void fgs_check_parms(sint handle, sint dom, sint op, + const char *func, unsigned line) +{ + if(dom<1 || dom>4) + fail(1,__FILE__,line,"%s: datatype %d not in valid range 1-4",func,dom); + if(op <1 || op >4) + fail(1,__FILE__,line,"%s: op %d not in valid range 1-4",func,op); + fgs_check_handle(handle,func,line); +} + +void fgs(const sint *handle, void *u, const sint *dom, const sint *op, + const sint *transpose) +{ + fgs_check_parms(*handle,*dom,*op,"gs_op",__LINE__); + cgs(u,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0,fgs_info[*handle],0); +} + +void fgs_vec(const sint *handle, void *u, const sint *n, + const sint *dom, const sint *op, const sint *transpose) +{ + fgs_check_parms(*handle,*dom,*op,"gs_op_vec",__LINE__); + cgs_vec(u,*n,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0, + fgs_info[*handle],0); +} + +void fgs_many(const sint *handle, void *u1, void *u2, void *u3, + void *u4, void *u5, void *u6, const sint *n, + const sint *dom, const sint *op, const sint *transpose) +{ + void *uu[6]; + uu[0]=u1,uu[1]=u2,uu[2]=u3,uu[3]=u4,uu[4]=u5,uu[5]=u6; + fgs_check_parms(*handle,*dom,*op,"gs_op_many",__LINE__); + cgs_many((void *const*)uu,*n,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0, + fgs_info[*handle],0); +} + +static struct array fgs_fields_array = null_array; + +void fgs_fields(const sint *handle, + void *u, const sint *stride, const sint *n, + const sint *dom, const sint *op, const sint *transpose) +{ + size_t offset; + void **p; + uint i; + + fgs_check_parms(*handle,*dom,*op,"gs_op_fields",__LINE__); + if(*n<0) return; + + array_reserve(void*,&fgs_fields_array,*n); + p = fgs_fields_array.ptr; + offset = *stride * gs_dom_size[*dom-1]; + for(i=*n;i;--i) *p++ = u, u = (char*)u + offset; + + cgs_many((void *const*)fgs_fields_array.ptr,*n, + (gs_dom)(*dom-1),(gs_op_t)(*op-1), + *transpose!=0, fgs_info[*handle],0); +} + +void fgs_free(const sint *handle) +{ + fgs_check_handle(*handle,"gs_free",__LINE__); + cgs_free(fgs_info[*handle]); + fgs_info[*handle] = 0; +} + diff --git a/src/jl/gs.h b/src/jl/gs.h new file mode 100644 index 0000000..43fc142 --- /dev/null +++ b/src/jl/gs.h @@ -0,0 +1,141 @@ +#ifndef GS_H +#define GS_H + +#if !defined(COMM_H) || !defined(GS_DEFS_H) || !defined(MEM_H) +#warning "gs.h" requires "comm.h", "gs_defs.h", and "mem.h" +#endif + +/* + Gather/Scatter Library + + The code + + struct comm c; // see "comm.h" + slong id[n]; // the slong type is defined in "types.h" + ... + struct gs_data *g = gs_setup(id,n, &c, 0,gs_auto,1); + + defines a partition of the set of (processor, local index) pairs, + (p,i) \in S_j iff abs(id[i]) == j on processor p + That is, all (p,i) pairs are grouped together (in group S_j) that have the + same id (=j). + S_0 is treated specially --- it is ignored completely + (i.e., when id[i] == 0, local index i does not participate in any + gather/scatter operation + If id[i] on proc p is negative then the pair (p,i) is "flagged". This + determines the non-symmetric behavior. For the simpler, symmetric case, + all id's should be positive. + + The second to last argument to gs_setup is the method to use, one of + gs_pairwise, gs_crystal_router, gs_all_reduce, gs_auto + The method "gs_auto" tries ~10 runs of each and chooses the fastest. + For a single-use handle, it makes more sense to use "gs_crystal_router". + + When "g" is no longer needed, free it with + + gs_free(g); + + A basic gather/scatter operation is, e.g., + + double v[n]; buffer buf; // see "mem.h" for "buffer" + ... + gs(v, gs_double,gs_add, 0, g,&buf); + + The buffer pointer can be null, in which case, a static buffer is used, + shared across all gs handles. + This gs call has the effect, (in the simple, symmetric, unflagged case) + + v[i] <-- \sum_{ (p,j) \in S_{id[i]} } v_(p) [j] + + where v_(p) [j] means v[j] on proc p. In other words, every v[i] is replaced + by the sum of all v[j]'s with the same id, given by id[i]. This accomplishes + "direct stiffness summation" corresponding to the action of QQ^T, where + "Q" is a boolean matrix that copies from a global vector (indexed by id) + to the local vectors indexed by (p,i) pairs. + + Summation on doubles is not the only operation and datatype supported. The + full list is defined in "gs_defs.h", and includes the operations + gs_add, gs_mul, gs_max, gs_min + and datatypes + gs_double, gs_float, gs_int, gs_long, gs_sint, gs_slong. + (The int and long types are the plain C types, whereas sint and slong + are defined in "types.h"). + + For the nonsymmetric behavior, the "transpose" parameter is important: + + gs(v, gs_double,gs_add, transpose, g,&buf); + + When transpose == 0, any "flagged" (p,i) pairs (id[i] negative on p) + do not participate in the sum, but *do* still receive the sum on output. + As a special case, when only one (p,i) pair is unflagged per group this + corresponds to the rectangular "Q" matrix referred to above. + + When transpose == 1, the "flagged" (p,i) pairs *do* participate in the sum, + but do *not* get set on output. In the special case of only one unflagged + (p,i) pair, this corresponds to the transpose of "Q" referred to above. + + + + A version for vectors (contiguously packed) is, e.g., + + double v[n][k]; + gs_vec(v,k, gs_double,gs_add, transpose, g,&buf); + + which is like "gs" operating on the datatype double[k], + with summation here being vector summation. Number of messages sent + is independent of k. + + For combining the communication for "gs" on multiple arrays: + + double v1[n], v2[n], ..., vk[n]; + double (*vs)[k] = {v1, v2, ..., vk}; + + gs_many(vs,k, gs_double,op, t, g,&buf); + + This call is equivalent to + + gs(v1, gs_double,op, t, g, &buf); + gs(v2, gs_double,op, t, g, &buf); + ... + gs(vk, gs_double,op, t, g, &buf); + + except that all communication is done together. + + + + Finally, gs_unique has the same basic signature as gs_setup: + + gs_unique(id,n, &c); + + This call modifies id, "flagging" (by negating id[i]) all (p,i) pairs in + each group except one. The sole "unflagged" member of the group is chosen + in an arbitrary but consistent way. If the "unique" flag is set when + calling gs_setup, the behavior is equivalent to first calling gs_unique, + except that the id array is left unmodified. + + +*/ + +#define gs PREFIXED_NAME(gs ) +#define gs_vec PREFIXED_NAME(gs_vec ) +#define gs_many PREFIXED_NAME(gs_many ) +#define gs_setup PREFIXED_NAME(gs_setup ) +#define gs_free PREFIXED_NAME(gs_free ) +#define gs_unique PREFIXED_NAME(gs_unique) + +struct gs_data; +typedef enum { gs_pairwise, gs_crystal_router, gs_all_reduce, + gs_auto } gs_method; + +void gs(void *u, gs_dom dom, gs_op op, unsigned transpose, + struct gs_data *gsh, buffer *buf); +void gs_vec(void *u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf); +void gs_many(void *const*u, unsigned vn, gs_dom dom, gs_op op, + unsigned transpose, struct gs_data *gsh, buffer *buf); +struct gs_data *gs_setup(const slong *id, uint n, const struct comm *comm, + int unique, gs_method method, int verbose); +void gs_free(struct gs_data *gsh); +void gs_unique(slong *id, uint n, const struct comm *comm); + +#endif diff --git a/src/jl/gs_defs.h b/src/jl/gs_defs.h new file mode 100644 index 0000000..df4ad7b --- /dev/null +++ b/src/jl/gs_defs.h @@ -0,0 +1,81 @@ +#ifndef GS_DEFS_H +#define GS_DEFS_H + +/* requires: + , for GS_DEFINE_IDENTITIES() + "types.h" for gs_sint, gs_slong +*/ + +/*------------------------------------------------------------------------------ + Monoid Definitions + + Here are defined the domains and operations, each combination being a + commutative semigroup, as well as the identity element making each a + commutative monoid. +------------------------------------------------------------------------------*/ + +/* the supported domains */ +#define GS_FOR_EACH_DOMAIN(macro) \ + macro(double) \ + macro(float ) \ + macro(int ) \ + macro(long ) \ + WHEN_LONG_LONG(macro(long_long)) + +/* the supported ops */ +#define GS_FOR_EACH_OP(T,macro) \ + macro(T,add) \ + macro(T,mul) \ + macro(T,min) \ + macro(T,max) \ + macro(T,bpr) + +#define GS_DO_add(a,b) a+=b +#define GS_DO_mul(a,b) a*=b +#define GS_DO_min(a,b) if(ba) a=b +#define GS_DO_bpr(a,b) \ + do if(b!=0) { uint a_ = a; uint b_ = b; \ + if(a_==0) { a=b_; break; } \ + for(;;) { if(a_>=1; else if(b_>=1; else break; } \ + a = a_; \ + } while(0) + +/* the monoid identity elements */ +#define GS_DEFINE_MONOID_ID(T,min,max) \ + static const T gs_identity_##T[] = { 0, 1, max, min, 0 }; +#define GS_DEFINE_IDENTITIES() \ + GS_DEFINE_MONOID_ID(double, -DBL_MAX, DBL_MAX) \ + GS_DEFINE_MONOID_ID(float , -FLT_MAX, FLT_MAX) \ + GS_DEFINE_MONOID_ID(int , INT_MIN, INT_MAX) \ + GS_DEFINE_MONOID_ID(long , LONG_MIN, LONG_MAX) \ + WHEN_LONG_LONG(GS_DEFINE_MONOID_ID(long_long,LLONG_MIN,LLONG_MAX)) + +/*------------------------------------------------------------------------------ + Enums and constants +------------------------------------------------------------------------------*/ + +/* domain enum */ +#define LIST GS_FOR_EACH_DOMAIN(ITEM) gs_dom_n +#define ITEM(T) gs_##T, +typedef enum { LIST } gs_dom; +#undef ITEM +#undef LIST + +#define gs_sint TYPE_LOCAL(gs_int,gs_long,gs_long_long) +#define gs_slong TYPE_GLOBAL(gs_int,gs_long,gs_long_long) + +/* domain type size array */ +#define GS_DOM_SIZE_ITEM(T) sizeof(T), +#define GS_DEFINE_DOM_SIZES() \ + static const unsigned gs_dom_size[] = \ + { GS_FOR_EACH_DOMAIN(GS_DOM_SIZE_ITEM) 0 }; + +/* operation enum */ +#define LIST GS_FOR_EACH_OP(T,ITEM) gs_op_n +#define ITEM(T,op) gs_##op, +typedef enum { LIST } gs_op; +#undef ITEM +#undef LIST + +#endif diff --git a/src/jl/gs_local.c b/src/jl/gs_local.c new file mode 100644 index 0000000..2bc246d --- /dev/null +++ b/src/jl/gs_local.c @@ -0,0 +1,336 @@ +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "types.h" + +#define gs_gather_array PREFIXED_NAME(gs_gather_array ) +#define gs_init_array PREFIXED_NAME(gs_init_array ) +#define gs_gather PREFIXED_NAME(gs_gather ) +#define gs_scatter PREFIXED_NAME(gs_scatter ) +#define gs_init PREFIXED_NAME(gs_init ) +#define gs_gather_vec PREFIXED_NAME(gs_gather_vec ) +#define gs_scatter_vec PREFIXED_NAME(gs_scatter_vec ) +#define gs_init_vec PREFIXED_NAME(gs_init_vec ) +#define gs_gather_many PREFIXED_NAME(gs_gather_many ) +#define gs_scatter_many PREFIXED_NAME(gs_scatter_many ) +#define gs_init_many PREFIXED_NAME(gs_init_many ) +#define gs_gather_vec_to_many PREFIXED_NAME(gs_gather_vec_to_many ) +#define gs_scatter_many_to_vec PREFIXED_NAME(gs_scatter_many_to_vec) +#define gs_scatter_vec_to_many PREFIXED_NAME(gs_scatter_vec_to_many) + +#include "gs_defs.h" +GS_DEFINE_IDENTITIES() +GS_DEFINE_DOM_SIZES() + +/*------------------------------------------------------------------------------ + The array gather kernel +------------------------------------------------------------------------------*/ +#define DEFINE_GATHER(T,OP) \ +static void gather_array_##T##_##OP( \ + T *restrict out, const T *restrict in, uint n) \ +{ \ + for(;n;--n) { T q = *in++, *p = out++; GS_DO_##OP(*p,q); } \ +} + +/*------------------------------------------------------------------------------ + The array initialization kernel +------------------------------------------------------------------------------*/ +#define DEFINE_INIT(T) \ +static void init_array_##T(T *restrict out, uint n, gs_op op) \ +{ \ + const T e = gs_identity_##T[op]; \ + for(;n;--n) *out++=e; \ +} + +#define DEFINE_PROCS(T) \ + GS_FOR_EACH_OP(T,DEFINE_GATHER) \ + DEFINE_INIT(T) + +GS_FOR_EACH_DOMAIN(DEFINE_PROCS) + +#undef DEFINE_PROCS +#undef DEFINE_INIT +#undef DEFINE_GATHER + +/*------------------------------------------------------------------------------ + The basic gather kernel +------------------------------------------------------------------------------*/ +#define DEFINE_GATHER(T,OP) \ +static void gather_##T##_##OP( \ + T *restrict out, const T *restrict in, const unsigned in_stride, \ + const uint *restrict map) \ +{ \ + uint i,j; \ + while((i=*map++)!=-(uint)1) { \ + T t=out[i]; \ + j=*map++; do GS_DO_##OP(t,in[j*in_stride]); while((j=*map++)!=-(uint)1); \ + out[i]=t; \ + } \ +} + +/*------------------------------------------------------------------------------ + The basic scatter kernel +------------------------------------------------------------------------------*/ +#define DEFINE_SCATTER(T) \ +static void scatter_##T( \ + T *restrict out, const unsigned out_stride, \ + const T *restrict in, const unsigned in_stride, \ + const uint *restrict map) \ +{ \ + uint i,j; \ + while((i=*map++)!=-(uint)1) { \ + T t=in[i*in_stride]; \ + j=*map++; do out[j*out_stride]=t; while((j=*map++)!=-(uint)1); \ + } \ +} + +/*------------------------------------------------------------------------------ + The basic initialization kernel +------------------------------------------------------------------------------*/ +#define DEFINE_INIT(T) \ +static void init_##T(T *restrict out, const uint *restrict map, gs_op op) \ +{ \ + uint i; const T e = gs_identity_##T[op]; \ + while((i=*map++)!=-(uint)1) out[i]=e; \ +} + +#define DEFINE_PROCS(T) \ + GS_FOR_EACH_OP(T,DEFINE_GATHER) \ + DEFINE_SCATTER(T) \ + DEFINE_INIT(T) + +GS_FOR_EACH_DOMAIN(DEFINE_PROCS) + +#undef DEFINE_PROCS +#undef DEFINE_INIT +#undef DEFINE_SCATTER +#undef DEFINE_GATHER + +/*------------------------------------------------------------------------------ + The vector gather kernel +------------------------------------------------------------------------------*/ +#define DEFINE_GATHER(T,OP) \ +static void gather_vec_##T##_##OP( \ + T *restrict out, const T *restrict in, const unsigned vn, \ + const uint *restrict map) \ +{ \ + uint i,j; \ + while((i=*map++)!=-(uint)1) { \ + T *restrict p = &out[i*vn], *pe = p+vn; \ + j=*map++; do { \ + const T *restrict q = &in[j*vn]; \ + T *restrict pk=p; do { GS_DO_##OP(*pk,*q); ++pk, ++q; } while(pk!=pe); \ + } while((j=*map++)!=-(uint)1); \ + } \ +} + +/*------------------------------------------------------------------------------ + The vector scatter kernel +------------------------------------------------------------------------------*/ +void gs_scatter_vec( + void *restrict out, const void *restrict in, const unsigned vn, + const uint *restrict map, gs_dom dom) +{ + unsigned unit_size = vn*gs_dom_size[dom]; + uint i,j; + while((i=*map++)!=-(uint)1) { + const char *t = (const char *)in + i*unit_size; + j=*map++; do + memcpy((char *)out+j*unit_size,t,unit_size); + while((j=*map++)!=-(uint)1); + } +} + +/*------------------------------------------------------------------------------ + The vector initialization kernel +------------------------------------------------------------------------------*/ +#define DEFINE_INIT(T) \ +static void init_vec_##T(T *restrict out, const unsigned vn, \ + const uint *restrict map, gs_op op) \ +{ \ + uint i; const T e = gs_identity_##T[op]; \ + while((i=*map++)!=-(uint)1) { \ + T *restrict u = (T*)out + vn*i, *ue = u+vn; \ + do *u++ = e; while(u!=ue); \ + } \ +} + +#define DEFINE_PROCS(T) \ + GS_FOR_EACH_OP(T,DEFINE_GATHER) \ + DEFINE_INIT(T) + +GS_FOR_EACH_DOMAIN(DEFINE_PROCS) + +#undef DEFINE_PROCS +#undef DEFINE_INIT +#undef DEFINE_GATHER + +#undef DO_bpr +#undef DO_max +#undef DO_min +#undef DO_mul +#undef DO_add + +#define SWITCH_DOMAIN_CASE(T) case gs_##T: WITH_DOMAIN(T); break; +#define SWITCH_DOMAIN(dom) do switch(dom) { \ + GS_FOR_EACH_DOMAIN(SWITCH_DOMAIN_CASE) case gs_dom_n: break; } while(0) + +#define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break; +#define SWITCH_OP(T,op) do switch(op) { \ + GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0) + +/*------------------------------------------------------------------------------ + Array kernels +------------------------------------------------------------------------------*/ +void gs_gather_array(void *out, const void *in, uint n, gs_dom dom, gs_op op) +{ +#define WITH_OP(T,OP) gather_array_##T##_##OP(out,in,n) +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_init_array(void *out, uint n, gs_dom dom, gs_op op) +{ +#define WITH_DOMAIN(T) init_array_##T(out,n,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +/*------------------------------------------------------------------------------ + Plain kernels; vn parameter ignored but present for consistent signatures +------------------------------------------------------------------------------*/ +void gs_gather(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ +#define WITH_OP(T,OP) gather_##T##_##OP(out,in,1,map) +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_scatter(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{ +#define WITH_DOMAIN(T) scatter_##T(out,1,in,1,map) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +void gs_init(void *out, const unsigned vn, const uint *map, + gs_dom dom, gs_op op) +{ +#define WITH_DOMAIN(T) init_##T(out,map,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +/*------------------------------------------------------------------------------ + Vector kernels +------------------------------------------------------------------------------*/ +void gs_gather_vec(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ +#define WITH_OP(T,OP) gather_vec_##T##_##OP(out,in,vn,map) +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_init_vec(void *out, const unsigned vn, const uint *map, + gs_dom dom, gs_op op) +{ +#define WITH_DOMAIN(T) init_vec_##T(out,vn,map,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +/*------------------------------------------------------------------------------ + Multiple array kernels +------------------------------------------------------------------------------*/ +void gs_gather_many(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ + uint k; + typedef void *ptr_to_void; typedef const void *ptr_to_const_void; + const ptr_to_void *p = out; const ptr_to_const_void *q = in; +#define WITH_OP(T,OP) for(k=0;k multiple arrays + Scatter from multiple arrays -> strided array, + Scatter from strided array -> multiple arrays, +------------------------------------------------------------------------------*/ +void gs_gather_vec_to_many(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op) +{ + unsigned i; const unsigned unit_size = gs_dom_size[dom]; + typedef void *ptr_to_void; + const ptr_to_void *p = out; const char *q = in; +#define WITH_OP(T,OP) \ + for(i=vn;i;--i) gather_##T##_##OP(*p++,(const T*)q,vn,map), q+=unit_size +#define WITH_DOMAIN(T) SWITCH_OP(T,op) + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +#undef WITH_OP +} + +void gs_scatter_many_to_vec(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{ + unsigned i; const unsigned unit_size = gs_dom_size[dom]; + typedef const void *ptr_to_const_void; + char *p = out; const ptr_to_const_void *q = in; +#define WITH_DOMAIN(T) \ + for(i=vn;i;--i) scatter_##T((T*)p,vn,*q++,1,map), p+=unit_size + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +void gs_scatter_vec_to_many(void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom) +{ + unsigned i; const unsigned unit_size = gs_dom_size[dom]; + typedef void *ptr_to_void; + const ptr_to_void *p = out; const char *q = in; +#define WITH_DOMAIN(T) \ + for(i=vn;i;--i) scatter_##T(*p++,1,(const T*)q,vn,map), q+=unit_size + SWITCH_DOMAIN(dom); +#undef WITH_DOMAIN +} + +#undef SWITCH_OP +#undef SWITCH_OP_CASE +#undef SWITCH_DOMAIN +#undef SWITCH_DOMAIN_CASE diff --git a/src/jl/gs_local.h b/src/jl/gs_local.h new file mode 100644 index 0000000..fc7c414 --- /dev/null +++ b/src/jl/gs_local.h @@ -0,0 +1,43 @@ +#ifndef GS_LOCAL_H +#define GS_LOCAL_H + +#if !defined(NAME_H) || !defined(TYPES_H) || !defined(GS_DEFS_H) +#warning "gs_local.h" requires "name.h", "types.h", and "gs_defs.h" +#endif + +#define gs_gather_array PREFIXED_NAME(gs_gather_array ) +#define gs_init_array PREFIXED_NAME(gs_init_array ) +#define gs_gather PREFIXED_NAME(gs_gather ) +#define gs_scatter PREFIXED_NAME(gs_scatter ) +#define gs_init PREFIXED_NAME(gs_init ) +#define gs_gather_vec PREFIXED_NAME(gs_gather_vec ) +#define gs_scatter_vec PREFIXED_NAME(gs_scatter_vec ) +#define gs_init_vec PREFIXED_NAME(gs_init_vec ) +#define gs_gather_many PREFIXED_NAME(gs_gather_many ) +#define gs_scatter_many PREFIXED_NAME(gs_scatter_many ) +#define gs_init_many PREFIXED_NAME(gs_init_many ) +#define gs_gather_vec_to_many PREFIXED_NAME(gs_gather_vec_to_many ) +#define gs_scatter_many_to_vec PREFIXED_NAME(gs_scatter_many_to_vec) +#define gs_scatter_vec_to_many PREFIXED_NAME(gs_scatter_vec_to_many) + +void gs_gather_array(void *out, const void *in, uint n, + gs_dom dom, gs_op op); +void gs_init_array(void *out, uint n, gs_dom dom, gs_op op); + +typedef void gs_gather_fun( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom, gs_op op); +typedef void gs_scatter_fun( + void *out, const void *in, const unsigned vn, + const uint *map, gs_dom dom); +typedef void gs_init_fun( + void *out, const unsigned vn, + const uint *map, gs_dom dom, gs_op op); + +extern gs_gather_fun gs_gather, gs_gather_vec, gs_gather_many, + gs_gather_vec_to_many; +extern gs_scatter_fun gs_scatter, gs_scatter_vec, gs_scatter_many, + gs_scatter_many_to_vec, gs_scatter_vec_to_many; +extern gs_init_fun gs_init, gs_init_vec, gs_init_many; + +#endif diff --git a/src/jl/gs_test.c b/src/jl/gs_test.c new file mode 100644 index 0000000..588a52b --- /dev/null +++ b/src/jl/gs_test.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "gs_defs.h" +#include "gs.h" + +typedef double T; +const gs_dom dom = gs_double; + +static void test(const struct comm *comm) +{ + struct gs_data *gsh; + const uint np = comm->np; + slong *id = tmalloc(slong,np+4); + T *v = tmalloc(T,np+4); + uint i; + id[0] = -(slong)(np+10+3*comm->id); + for(i=0;iid+1; + id[np+2] = comm->id+1; + id[np+3] = np-comm->id; + gsh = gs_setup(id,np+4,comm,0,gs_auto,1); + free(id); + + for(i=0;iid==0) for(i=0;iid==0) printf("\n"); + for(i=0;iid==0) for(i=0;i +#include +#include +#ifdef MPI +# include +#else + typedef void MPI_Comm; +#endif +#include "name.h" +#include "types.h" + +typedef long real; +sint datatype = 4; + +#define fgs_setup FORTRAN_NAME(gs_setup ,GS_SETUP ) +#define fgs_op FORTRAN_NAME(gs_op ,GS_OP ) +#define fgs_op_vec FORTRAN_NAME(gs_op_vec ,GS_OP_VEC ) +#define fgs_op_many FORTRAN_NAME(gs_op_many ,GS_OP_MANY ) +#define fgs_op_fields FORTRAN_NAME(gs_op_fields,GS_OP_FIELDS) +#define fgs_free FORTRAN_NAME(gs_free ,GS_FREE ) + +void fgs_setup(sint *handle, const slong id[], const sint *n, + const MPI_Comm *comm, const sint *np); +void fgs_op(const sint *handle, void *u, const sint *dom, const sint *op, + const sint *transpose); +void fgs_op_vec(const sint *handle, void *u, const sint *n, + const sint *dom, const sint *op, const sint *transpose); +void fgs_op_many(const sint *handle, void *u1, void *u2, void *u3, + void *u4, void *u5, void *u6, const sint *n, + const sint *dom, const sint *op, const sint *transpose); +void fgs_free(const sint *handle); + +void assert_is_zero(real v) +{ + if(fabs(v) < 1e-20) return; + printf("test failed\n"); + exit(1); +} + +int main(int narg, char* arg[]) +{ + sint transpose=0; + sint id=0,np=1; + sint i,handle,maxv=3; + real *u; + slong *glindex; +#ifndef MPI + int comm; +#else + MPI_Comm comm; + MPI_Init(&narg,&arg); + MPI_Comm_dup(MPI_COMM_WORLD,&comm); + { int i; + MPI_Comm_rank(comm,&i); id=i; + MPI_Comm_size(comm,&i); np=i; + } +#endif + + glindex = malloc(np*2*sizeof(slong)); + for(i=0;i +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "gs_defs.h" +#include "gs.h" + +static void test(const struct comm *comm) +{ + uint i,np=comm->np,id=comm->id; + slong *glindex = tmalloc(slong,np*2); + char *out, *buf = tmalloc(char,80+np*2*30); + struct gs_data *gsh; + + for(i=0;iid); + for(i=0;iid); + for(i=0;iid); + for(i=0;i for size_t, offsetof + for malloc, calloc, realloc, free + for memcpy + "c99.h" + "fail.h" +*/ + +#if !defined(C99_H) || !defined(FAIL_H) +#error "mem.h" requires "c99.h" and "fail.h" +#endif + +/* + All memory management goes through the wrappers defined in this + header. Diagnostics can be turned on with + -DPRINT_MALLOCS=1 + Then all memory management operations will be printed to stdout. + + Most memory management occurs through use of the "array" type, + defined below, which defines a generic dynamically-sized array + that grows in bursts. The "buffer" type is a "char" array and + is often passed around by code to provide a common area for + scratch work. +*/ + +#ifndef PRINT_MALLOCS +# define PRINT_MALLOCS 0 +#else +# include +# ifndef comm_gbl_id +# define comm_gbl_id PREFIXED_NAME(comm_gbl_id) +# define comm_gbl_np PREFIXED_NAME(comm_gbl_np) +# include "types.h" + extern uint comm_gbl_id, comm_gbl_np; +# endif +#endif + +/*-------------------------------------------------------------------------- + Memory Allocation Wrappers to Catch Out-of-memory + --------------------------------------------------------------------------*/ + +static inline void *smalloc(size_t size, const char *file, unsigned line) +{ + void *restrict res = malloc(size); + #if PRINT_MALLOCS + fprintf(stdout,"MEM: proc %04d: %p = malloc(%ld) @ %s(%u)\n", + (int)comm_gbl_id,res,(long)size,file,line), fflush(stdout); + #endif + if(!res && size) + fail(1,file,line,"allocation of %ld bytes failed\n",(long)size); + return res; +} + +static inline void *scalloc( + size_t nmemb, size_t size, const char *file, unsigned line) +{ + void *restrict res = calloc(nmemb, size); + #if PRINT_MALLOCS + fprintf(stdout,"MEM: proc %04d: %p = calloc(%ld) @ %s(%u)\n", + (int)comm_gbl_id,res,(long)size*nmemb,file,line), fflush(stdout); + #endif + if(!res && nmemb) + fail(1,file,line,"allocation of %ld bytes failed\n", + (long)size*nmemb); + return res; +} + +static inline void *srealloc( + void *restrict ptr, size_t size, const char *file, unsigned line) +{ + void *restrict res = realloc(ptr, size); + #if PRINT_MALLOCS + if(res!=ptr) { + if(ptr) + fprintf(stdout,"MEM: proc %04d: %p freed by realloc @ %s(%u)\n", + (int)comm_gbl_id,ptr,file,line), fflush(stdout); + fprintf(stdout,"MEM: proc %04d: %p = realloc of %p to %lu @ %s(%u)\n", + (int)comm_gbl_id,res,ptr,(long)size,file,line), fflush(stdout); + } else + fprintf(stdout,"MEM: proc %04d: %p realloc'd to %lu @ %s(%u)\n", + (int)comm_gbl_id,res,(long)size,file,line), fflush(stdout); + #endif + if(!res && size) + fail(1,file,line,"allocation of %ld bytes failed\n",(long)size); + return res; +} + +#define tmalloc(type, count) \ + ((type*) smalloc((count)*sizeof(type),__FILE__,__LINE__) ) +#define tcalloc(type, count) \ + ((type*) scalloc((count),sizeof(type),__FILE__,__LINE__) ) +#define trealloc(type, ptr, count) \ + ((type*) srealloc((ptr),(count)*sizeof(type),__FILE__,__LINE__) ) + +#if PRINT_MALLOCS +static inline void sfree(void *restrict ptr, const char *file, unsigned line) +{ + free(ptr); + fprintf(stdout,"MEM: proc %04d: %p freed @ %s(%u)\n", + (int)comm_gbl_id,ptr,file,line), fflush(stdout); +} +#define free(x) sfree(x,__FILE__,__LINE__) +#endif + +/*-------------------------------------------------------------------------- + A dynamic array + --------------------------------------------------------------------------*/ +struct array { void *ptr; size_t n,max; }; +#define null_array {0,0,0} +static void array_init_(struct array *a, size_t max, size_t size, + const char *file, unsigned line) +{ + a->n=0, a->max=max, a->ptr=smalloc(max*size,file,line); +} +static void array_resize_(struct array *a, size_t max, size_t size, + const char *file, unsigned line) +{ + a->max=max, a->ptr=srealloc(a->ptr,max*size,file,line); +} +static void *array_reserve_(struct array *a, size_t min, size_t size, + const char *file, unsigned line) +{ + size_t max = a->max; + if(maxptr; +} + +#define array_free(a) (free((a)->ptr)) +#define array_init(T,a,max) array_init_(a,max,sizeof(T),__FILE__,__LINE__) +#define array_resize(T,a,max) array_resize_(a,max,sizeof(T),__FILE__,__LINE__) +#define array_reserve(T,a,min) array_reserve_(a,min,sizeof(T),__FILE__,__LINE__) + +static void array_cat_(size_t size, struct array *d, const void *s, size_t n, + const char *file, unsigned line) +{ + char *out = array_reserve_(d,d->n+n,size, file,line); + memcpy(out+d->n*size, s, n*size); + d->n+=n; +} + +#define array_cat(T,d,s,n) array_cat_(sizeof(T),d,s,n,__FILE__,__LINE__) + +/*-------------------------------------------------------------------------- + Buffer = char array + --------------------------------------------------------------------------*/ +typedef struct array buffer; +#define null_buffer null_array +#define buffer_init(b,max) array_init(char,b,max) +#define buffer_resize(b,max) array_resize(char,b,max) +#define buffer_reserve(b,max) array_reserve(char,b,max) +#define buffer_free(b) array_free(b) + +/*-------------------------------------------------------------------------- + Alignment routines + --------------------------------------------------------------------------*/ +#define ALIGNOF(T) offsetof(struct { char c; T x; }, x) +static size_t align_as_(size_t a, size_t n) { return (n+a-1)/a*a; } +#define align_as(T,n) align_as_(ALIGNOF(T),n) +#define align_ptr(T,base,offset) ((T*)((char*)(base)+align_as(T,offset))) +#endif + diff --git a/src/jl/name.h b/src/jl/name.h new file mode 100644 index 0000000..b4bcd91 --- /dev/null +++ b/src/jl/name.h @@ -0,0 +1,44 @@ +#ifndef NAME_H +#define NAME_H + +/* establishes some macros to establish + * the FORTRAN naming convention + default gs_setup, etc. + -DUPCASE GS_SETUP, etc. + -DUNDERSCORE gs_setup_, etc. + * a prefix for all external (non-FORTRAN) function names + for example, -DPREFIX=jl_ transforms fail -> jl_fail + * a prefix for all external FORTRAN function names + for example, -DFPREFIX=jlf_ transforms gs_setup_ -> jlf_gs_setup_ +*/ + +/* the following macro functions like a##b, + but will expand a and/or b if they are themselves macros */ +#define TOKEN_PASTE_(a,b) a##b +#define TOKEN_PASTE(a,b) TOKEN_PASTE_(a,b) + +#ifdef PREFIX +# define PREFIXED_NAME(x) TOKEN_PASTE(PREFIX,x) +#else +# define PREFIXED_NAME(x) x +#endif + +#ifdef FPREFIX +# define FPREFIXED_NAME(x) TOKEN_PASTE(FPREFIX,x) +#else +# define FPREFIXED_NAME(x) x +#endif + +#if defined(UPCASE) +# define FORTRAN_NAME(low,up) FPREFIXED_NAME(up) +# define FORTRAN_UNPREFIXED(low,up) up +#elif defined(UNDERSCORE) +# define FORTRAN_NAME(low,up) FPREFIXED_NAME(TOKEN_PASTE(low,_)) +# define FORTRAN_UNPREFIXED(low,up) TOKEN_PASTE(low,_) +#else +# define FORTRAN_NAME(low,up) FPREFIXED_NAME(low) +# define FORTRAN_UNPREFIXED(low,up) low +#endif + +#endif + diff --git a/src/jl/odep_info.py b/src/jl/odep_info.py new file mode 100755 index 0000000..620d0ec --- /dev/null +++ b/src/jl/odep_info.py @@ -0,0 +1,50 @@ +#!/usr/bin/python + +import sys, os, re + +obj_files = sys.argv[1:] + +defined = dict((x,set([])) for x in obj_files) +undefined = dict((x,set([])) for x in obj_files) +nm_re = re.compile("[0-9a-fA-F]*\s*([BCDRTU])\s+([A-Za-z_][A-Za-z_0-9]*)\s*") +def nm_match(x): return ( nm_re.match(line) for line in os.popen('nm -g '+x) ) +def nm_line(x,m): + if m.group(1)=='U': undefined[x].add(m.group(2)) + else: defined[x].add(m.group(2)) +[ [ nm_line(x,m) for m in nm_match(x) if m!=None ] for x in obj_files ] + +def closure(seq,f): + v = [], [x for x in seq], set(x for x in seq) + while len(v[1]): [(v[1].append(y),v[2].add(y)) for y in + f((lambda x: (v[0].append(x),x)[1])(v[1].pop())) if not y in v[2]] + return v[0] + +needs={} +def get_needs(x): + if not needs.has_key(x): + needs[x]=[y for y in obj_files if len(defined[y]&undefined[x])] + return needs[x] +deps = dict((x,closure(get_needs(x),get_needs)) for x in obj_files) + +for x in deps: + print x,'depends on',reduce((lambda a,b: a+" "+b),deps[x],"") +print + +results = [ os.path.splitext(x)[0] for x in obj_files if 'main' in defined[x] ] +print "RESULTS="+reduce((lambda a,b: a+" "+b),results,"") +print + +def need_X(objs): + for x in objs: + if "XOpenDisplay" in undefined[x]: return True + return False + +for x in results: + objs = deps[x+'.o']; + if not (x+'.o') in objs: objs.append(x+'.o') + sobjs = reduce((lambda a,b: a+" "+b),objs,"") + if need_X(objs): + print x+":"+sobjs+" ; @echo LINK $@; $(LINKCMD) $^ -lX11 -o $@" + else: + print x+":"+sobjs+" ; @echo LINK $@; $(LINKCMD) $^ -o $@" + diff --git a/src/jl/rand_elt_test.c b/src/jl/rand_elt_test.c new file mode 100644 index 0000000..1e11dae --- /dev/null +++ b/src/jl/rand_elt_test.c @@ -0,0 +1,169 @@ +#include +#include +#include "c99.h" +#include "types.h" +#include "name.h" +#include "poly.h" +#include "lob_bnd.h" + +static double det_2(const double A[4]) { return A[0]*A[3]-A[1]*A[2]; } + +static double quad_2(const double x0, const double g[2], const double H[3], + const double r[2]) +{ + return x0 + (g[0]*r[0]+g[1]*r[1]) + + ( r[0] * (H[0]*r[0]+H[1]*r[1]) + + r[1] * (H[1]*r[0]+H[2]*r[1]) )/2; +} + +static void quad_2_grad(double grad[2], const double g[2], const double H[3], + const double r[2]) +{ + grad[0] = g[0] + (H[0]*r[0]+H[1]*r[1]); + grad[1] = g[1] + (H[1]*r[0]+H[2]*r[1]); +} + +static double quad_2_jac(const double g[4], const double H[6], + const double r[2]) +{ + double J[4]; + quad_2_grad(J ,g ,H ,r); + quad_2_grad(J+2,g+2,H+3,r); + return det_2(J); +} + +static double det_3(const double A[9]) +{ + const double a = A[4]*A[8]-A[5]*A[7], + b = A[5]*A[6]-A[3]*A[8], + c = A[3]*A[7]-A[4]*A[6]; + return A[0]*a+A[1]*b+A[2]*c; +} + +static double quad_3(const double x0, const double g[3], const double H[6], + const double r[3]) +{ + return x0 + (g[0]*r[0]+g[1]*r[1]+g[2]*r[2]) + + ( r[0] * (H[0]*r[0]+H[1]*r[1]+H[2]*r[2]) + + r[1] * (H[1]*r[0]+H[3]*r[1]+H[4]*r[2]) + + r[2] * (H[2]*r[0]+H[4]*r[1]+H[5]*r[2]) )/2; +} + +static void quad_3_grad(double grad[3], const double g[3], const double H[6], + const double r[3]) +{ + grad[0] = g[0] + (H[0]*r[0]+H[1]*r[1]+H[2]*r[2]); + grad[1] = g[1] + (H[1]*r[0]+H[3]*r[1]+H[4]*r[2]); + grad[2] = g[2] + (H[2]*r[0]+H[4]*r[1]+H[5]*r[2]); +} + +static double quad_3_jac(const double g[9], const double H[18], + const double r[3]) +{ + double J[9]; + quad_3_grad(J ,g ,H ,r); + quad_3_grad(J+3,g+3,H+ 6,r); + quad_3_grad(J+6,g+6,H+12,r); + return det_3(J); +} + +void rand_elt_2(double *x, double *y, + const double *zr, unsigned nr, + const double *zs, unsigned ns) +{ + static int init=0; + static double z4[4], lob_bnd_data[16+3*4*(2*16+1)], + work[2*16*(4+16+1)]; + unsigned i,j; + double x0[2], g[4], H[6], jac[4*4], r[2]; + struct dbl_range jr; + if(!init) { + init=1; + lobatto_nodes(z4,4); + lob_bnd_setup(lob_bnd_data,4,16); + } + do { + for(i=0;i<4;++i) g[i] = -1+2*(rand()/(double)RAND_MAX); + for(i=0;i<6;++i) H[i] =.5*(-1+2*(rand()/(double)RAND_MAX)); + for(j=0;j<4;++j) { r[1] = z4[j]; + for(i=0;i<4;++i) { r[0] = z4[i]; + jac[j*4+i] = quad_2_jac(g,H,r); + } + } + jr = lob_bnd_2(lob_bnd_data,4,16, lob_bnd_data,4,16, jac, work); + /*printf("Jacobian range %g, %g\n", jr.min, jr.max);*/ + } while(jr.max*jr.min<=0); + for(i=0;i< 2;++i) x0[i] = -1+2*(rand()/(double)RAND_MAX); + for(j=0;j +#include +#include +#include "c99.h" +#include "name.h" +#include "types.h" +#include "fail.h" +#include "mem.h" +#include "sort.h" + +#define sarray_permute_ PREFIXED_NAME(sarray_permute_) +#define sarray_permute_buf_ PREFIXED_NAME(sarray_permute_buf_) + +void sarray_permute_(size_t size, void *A, size_t n, uint *perm, void *work) +{ + char *const ar = A, *const item = work; + sint *const fperm = (sint*)perm; + uint i; + for(i=0;iptr, + (char*)buf->ptr + align_as_(align,n*sizeof(uint))); +} diff --git a/src/jl/sarray_sort.h b/src/jl/sarray_sort.h new file mode 100644 index 0000000..77dc653 --- /dev/null +++ b/src/jl/sarray_sort.h @@ -0,0 +1,89 @@ +#ifndef SARRAY_SORT_H +#define SARRAY_SORT_H + +#if !defined(SORT_H) +#warning "sarray_sort.h" requires "sort.h" +#endif + +/*------------------------------------------------------------------------------ + + Array of Structs Sort + + buffer *buf; + typedef struct { ... } T; + T A[n]; + + sarray_sort(T,A,n, field_name,is_long, buf) + - sort A according to the struct field "field_name", + which is a ulong/uint field according as is_long is true/false + + sarray_sort_2(T,A,n, field1,is_long1, field2,is_long2, buf) + - sort A by field1 then field2 + + sarray_permute(T,A,n, perm, work) + - permute A (in-place) + A[0] <- A[perm[0]], etc. + work needs to hold sizeof(T) bytes (i.e., 1 T) + + sarray_permute_buf(T,A,n, buf); + - permute A according to the permutation in buf + A[0] <- A[perm[0]], etc. + where uint *perm = buf->ptr (see "sort.h") + + ----------------------------------------------------------------------------*/ + + +#define sarray_permute_ PREFIXED_NAME(sarray_permute_) +#define sarray_permute_buf_ PREFIXED_NAME(sarray_permute_buf_) + +void sarray_permute_(size_t size, void *A, size_t n, uint *perm, void *work); +void sarray_permute_buf_( + size_t align, size_t size, void *A, size_t n, buffer *buf); + +#define sarray_permute(T,A,n, perm, work) \ + sarray_permute_(sizeof(T),A,n, perm, work) +#define sarray_permute_buf(T,A,n, buf) \ + sarray_permute_buf_(ALIGNOF(T),sizeof(T),A,n,buf) + +#define sarray_sort_field(T,A,n, field,is_long, buf,keep) do { \ + if(is_long) \ + sortp_long(buf,keep, (ulong*)((char*)(A)+offsetof(T,field)),n,sizeof(T)); \ + else \ + sortp (buf,keep, (uint *)((char*)(A)+offsetof(T,field)),n,sizeof(T)); \ +} while (0) + +#define sarray_sort(T,A,n, field,is_long, buf) do { \ + sarray_sort_field(T,A,n, field,is_long, buf,0); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +#define sarray_sort_2(T,A,n, field1,is_long1, field2,is_long2, buf) do { \ + sarray_sort_field(T,A,n, field2,is_long2, buf,0); \ + sarray_sort_field(T,A,n, field1,is_long1, buf,1); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +#define sarray_sort_3(T,A,n, field1,is_long1, field2,is_long2, \ + field3,is_long3, buf) do { \ + sarray_sort_field(T,A,n, field3,is_long3, buf,0); \ + sarray_sort_field(T,A,n, field2,is_long2, buf,1); \ + sarray_sort_field(T,A,n, field1,is_long1, buf,1); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +#define sarray_sort_4(T,A,n, field1,is_long1, field2,is_long2, \ + field3,is_long3, field4,is_long4, buf) do { \ + sarray_sort_field(T,A,n, field4,is_long4, buf,0); \ + sarray_sort_field(T,A,n, field3,is_long3, buf,1); \ + sarray_sort_field(T,A,n, field2,is_long2, buf,1); \ + sarray_sort_field(T,A,n, field1,is_long1, buf,1); \ + sarray_permute_buf(T,A,n, buf); \ +} while (0) + +static void sarray_perm_invert( + uint *const pinv, const uint *const perm, const uint n) +{ + uint i; for(i=0;i +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sort.h" +#include "sarray_sort.h" + +int main() +{ + struct rec { double d; slong l; sint i; float f; }; + buffer buf = {0,0,0}; + struct rec rec[500]; + uint i; + + for(i=0;i<500;++i) { + sint num1 = rand() & 0xff; + slong num2 = rand(); + num2<<=(CHAR_BIT)*sizeof(int)-1; + num2|=rand(); + num2<<=(CHAR_BIT)*sizeof(int)-1; + num2|=rand(); + num2= num2<0?-num2:num2; + rec[i].d = num2; + rec[i].f = num2; + rec[i].l = num2; + rec[i].i = num1; + } + sarray_sort_2(struct rec,rec,500, i,0, l,1, &buf); + for(i=0;i<500;++i) + printf("%g\t%g\t%ld\t%d\n", + rec[i].d,rec[i].f,(long)rec[i].l,(int)rec[i].i); + + printf("\n"); + sarray_sort(struct rec,rec,500, l,1, &buf); + for(i=0;i<500;++i) + printf("%g\t%g\t%ld\t%d\n", + rec[i].d,rec[i].f,(long)rec[i].l,(int)rec[i].i); + buffer_free(&buf); + return 0; +} + diff --git a/src/jl/sarray_transfer.c b/src/jl/sarray_transfer.c new file mode 100644 index 0000000..9eed6ba --- /dev/null +++ b/src/jl/sarray_transfer.c @@ -0,0 +1,197 @@ +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "crystal.h" +#include "sort.h" + +#define sarray_transfer_many PREFIXED_NAME(sarray_transfer_many) +#define sarray_transfer_ PREFIXED_NAME(sarray_transfer_ ) +#define sarray_transfer_ext_ PREFIXED_NAME(sarray_transfer_ext_) + +static void pack_int( + buffer *const data, const unsigned row_size, const uint id, + const char *const restrict input, const uint n, const unsigned size, + const unsigned p_off, const uint *const restrict perm) +{ + const unsigned after = p_off + sizeof(uint), after_len = size-after; + +#define GET_P() memcpy(&p,row+p_off,sizeof(uint)) +#define COPY_ROW() memcpy(out,row,p_off), \ + memcpy((char*)out + p_off,row+after,after_len) + +#define PACK_BODY() do { \ + uint dummy, *len_ptr=&dummy; \ + uint i, p,lp = -(uint)1, len=0; \ + uint *restrict out = buffer_reserve(data, n*(row_size+3)*sizeof(uint)); \ + for(i=0;in = out - (uint*)data->ptr; \ +} while(0) + PACK_BODY(); +#undef COPY_ROW +#undef GET_P +} + +static void pack_ext( + buffer *const data, const unsigned row_size, const uint id, + const char *const restrict input, const uint n, const unsigned size, + const uint *const restrict proc, const unsigned proc_stride, + const uint *const restrict perm) +{ + #define GET_P() p=*(const uint*)((const char*)proc+proc_stride*perm[i]) + #define COPY_ROW() memcpy(out,row,size) + PACK_BODY(); + #undef PACK_BODY + #undef COPY_ROW + #undef GET_P +} + +static void pack_more( + buffer *const data, const unsigned off, const unsigned row_size, + const char *const restrict input, const unsigned size, + const uint *restrict perm) +{ + uint *restrict buf = data->ptr, *buf_end = buf+data->n; + while(buf!=buf_end) { + uint *msg_end = buf+3+buf[2]; buf+=3; + while(buf!=msg_end) + memcpy((char*)buf+off, input+size*(*perm++), size), buf+=row_size; + } +} + +static void unpack_more( + char *restrict out, const unsigned size, + const buffer *const data, const unsigned off, const unsigned row_size) +{ + const uint *restrict buf = data->ptr, *buf_end = buf+data->n; + while(buf!=buf_end) { + const uint *msg_end = buf+3+buf[2]; buf+=3; + while(buf!=msg_end) + memcpy(out, (char*)buf+off, size), out+=size, buf+=row_size; + } +} + +static void unpack_int( + char *restrict out, const unsigned size, const unsigned p_off, + const buffer *const data, const unsigned row_size, int set_src) +{ + const unsigned after = p_off + sizeof(uint), after_len = size-after; + const uint *restrict buf = data->ptr, *buf_end = buf+data->n; + const unsigned pi = set_src ? 1:0; + while(buf!=buf_end) { + const uint p=buf[pi], *msg_end = buf+3+buf[2]; buf+=3; + while(buf!=msg_end) { + memcpy(out,buf,p_off); + memcpy(out+p_off,&p,sizeof(uint)); + memcpy(out+after,(const char *)buf+p_off,after_len); + out+=size, buf+=row_size; + } + } +} + +static uint num_rows(const buffer *const data, const unsigned row_size) +{ + const uint *buf = data->ptr, *buf_end = buf + data->n; + uint n=0; + while(buf!=buf_end) { uint len=buf[2]; n+=len, buf+=len+3; } + return n/row_size; +} + +static uint cap_rows(buffer *const data, const unsigned row_size,const uint max) +{ + uint *buf = data->ptr, *buf_end = buf + data->n; + const uint maxn = max*row_size; + uint n=0; + while(buf!=buf_end) { + uint len=buf[2]; n+=len; + if(nn = (buf-(uint*)data->ptr)+3+buf[2]; + buf+=len+3; + while(buf!=buf_end) { uint len=buf[2]; n+=len, buf+=len+3; } + break; + } + } + return n/row_size; +} + +/* An must be >= 1 */ +uint sarray_transfer_many( + struct array *const *const A, const unsigned *const size, const unsigned An, + const int fixed, const int ext, const int set_src, const unsigned p_off, + const uint *const restrict proc, const unsigned proc_stride, + struct crystal *const cr) +{ + uint n, *perm; + unsigned i,row_size,off,off1; + + off1 = size[0]; + if(!ext) off1 -= sizeof(uint); + row_size=off1; for(i=1;iwork,0, proc,A[0]->n,proc_stride); + + if(!ext) pack_int(&cr->data, row_size, cr->comm.id, A[0]->ptr,A[0]->n,size[0], + p_off, perm); + else pack_ext(&cr->data, row_size, cr->comm.id, A[0]->ptr,A[0]->n,size[0], + proc,proc_stride, perm); + for(off=off1,i=1;idata,off,row_size, A[i]->ptr,size[i], perm),off+=size[i]; + + crystal_router(cr); + + if(!fixed) { + n = num_rows(&cr->data,row_size); + for(i=0;in=n; + } else { + uint max=A[0]->max, an; + for(i=1;imaxmax; + n = cap_rows(&cr->data,row_size, max); + an = n>max?max:n; + for(i=0;in=an; + } + + if(!ext) unpack_int (A[0]->ptr,size[0],p_off, &cr->data, row_size, set_src); + else unpack_more(A[0]->ptr,size[0], &cr->data,0,row_size); + for(off=off1,i=1;iptr,size[i], &cr->data,off,row_size),off+=size[i]; + + return n; +} + + +void sarray_transfer_(struct array *const A, const unsigned size, + const unsigned p_off, const int set_src, + struct crystal *const cr) +{ + sarray_transfer_many(&A,&size,1, 0,0,set_src,p_off, + (uint*)((char*)A->ptr+p_off),size, cr); +} + +void sarray_transfer_ext_(struct array *const A, const unsigned size, + const uint *const proc, const unsigned proc_stride, + struct crystal *const cr) +{ + sarray_transfer_many(&A,&size,1, 0,1,0,0, proc,proc_stride, cr); +} + diff --git a/src/jl/sarray_transfer.h b/src/jl/sarray_transfer.h new file mode 100644 index 0000000..c195e21 --- /dev/null +++ b/src/jl/sarray_transfer.h @@ -0,0 +1,95 @@ +#ifndef SARRAY_TRANSFER_H +#define SARRAY_TRANSFER_H + +#if !defined(CRYSTAL_H) +#warning "sarray_transfer.h" requires "crystal.h" +#endif + +/* + High-level interface for the crystal router. + Given an array of structs, transfers each to the process indicated + by a field of the struct, which gets set to the source process on output. + + For the dynamic "array" type, see "mem.h". + + Requires a "crystal router" object: + + struct comm c; + struct crystal cr; + + comm_init(&c, MPI_COMM_WORLD); + crystal_init(&cr, &c); + + Example sarray_transfer usage: + + struct T { ...; uint proc; ...; }; + struct array A = null_array; + struct T *p, *e; + + // resize A to 100 struct T's, fill up with data + p = array_reserve(struct T, &A, 100), A.n=100; + for(e=p+A.n;p!=e;++p) { + ... + p->proc = ...; + ... + } + + // array A represents the array + // struct T ar[A.n] where &ar[0] == A.ptr + // transfer ar[i] to processor ar[i].proc for each i=0,...,A.n-1: + + sarray_transfer(struct T, A, proc,set_src, &cr); + + // now array A represents a different array with a different size + // struct T ar[A.n] where &ar[0] == A.ptr + // the ordering is arbitrary + // if set_src != 0, ar[i].proc is set to the proc where ar[i] came from + // otherwise ar[i].proc is unchanged (and == this proc id) + + // note: two calls of + sarray_transfer(struct T, A, proc,1, &cr); + // in a row should return A to its original state, up to ordering + + Cleanup: + array_free(&A); + crystal_free(&cr); + comm_free(&c); + + Example sarray_transfer_ext usage: + + struct T { ... }; + struct array A; + uint proc[A.n]; + + // array A represents the array + // struct T ar[A.n] where &ar[0] == A.ptr + // transfer ar[i] to processor proc[i] for each i=0,...,A.n-1: + sarray_transfer_ext(struct T, &A, proc, &cr); + + // no information is available now on where each struct came from + +*/ + +#define sarray_transfer_many PREFIXED_NAME(sarray_transfer_many) +#define sarray_transfer_ PREFIXED_NAME(sarray_transfer_ ) +#define sarray_transfer_ext_ PREFIXED_NAME(sarray_transfer_ext_) + +uint sarray_transfer_many( + struct array *const *const A, const unsigned *const size, const unsigned An, + const int fixed, const int ext, const int set_src, const unsigned p_off, + const uint *const restrict proc, const unsigned proc_stride, + struct crystal *const cr); +void sarray_transfer_(struct array *const A, const unsigned size, + const unsigned p_off, const int set_src, + struct crystal *const cr); +void sarray_transfer_ext_(struct array *const A, const unsigned size, + const uint *const proc, const unsigned proc_stride, + struct crystal *const cr); + +#define sarray_transfer(T,A,proc_field,set_src,cr) \ + sarray_transfer_(A,sizeof(T),offsetof(T,proc_field),set_src,cr) + +#define sarray_transfer_ext(T,A,proc,proc_stride,cr) \ + sarray_transfer_ext_(A,sizeof(T),proc,proc_stride,cr) + +#endif diff --git a/src/jl/sarray_transfer_test.c b/src/jl/sarray_transfer_test.c new file mode 100644 index 0000000..aaf3b7f --- /dev/null +++ b/src/jl/sarray_transfer_test.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "comm.h" +#include "mem.h" +#include "sort.h" +#include "sarray_sort.h" +#include "crystal.h" +#include "sarray_transfer.h" + +typedef struct { + double d; + ulong l,l2; + uint i; + uint p; +} r_work; + +int main(int narg, char *arg[]) +{ + comm_ext world; int np; + struct comm comm; + struct crystal crystal; + struct array A, A0=null_array; r_work *row, *row_0; + uint i; +#ifdef MPI + MPI_Init(&narg,&arg); + world = MPI_COMM_WORLD; + MPI_Comm_size(world,&np); +#else + world=0, np=1; +#endif + + comm_init(&comm,world); + crystal_init(&crystal,&comm); + + array_init(r_work,&A,np*3), A.n=np*3, row=A.ptr; + for(i=0;i %02d: %08x %08x %d %g\n", + (int)comm.id,(int)row[i].p,(int)row[i].i, + (int)row[i].l,(int)row[i].p,row[i].d); + + array_cat(r_work,&A0, row,A.n); + + sarray_transfer(r_work,&A, p,1, &crystal); + + row=A.ptr; + for(i=0;i +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" + +#define T unsigned int +#define SORT_SUFFIX _ui +#include "sort_imp.h" +#undef SORT_SUFFIX +#undef T + +#if defined(USE_LONG) || defined(GLOBAL_LONG) +# define T unsigned long +# define SORT_SUFFIX _ul +# include "sort_imp.h" +# undef SORT_SUFFIX +# undef T +#endif + +#if defined(USE_LONG_LONG) || defined(GLOBAL_LONG_LONG) +# define T unsigned long long +# define SORT_SUFFIX _ull +# include "sort_imp.h" +# undef SORT_SUFFIX +# undef T +#endif diff --git a/src/jl/sort.h b/src/jl/sort.h new file mode 100644 index 0000000..eaeeb95 --- /dev/null +++ b/src/jl/sort.h @@ -0,0 +1,76 @@ +#ifndef SORT_H +#define SORT_H + +#if !defined(TYPES_H) || !defined(MEM_H) +#warning "sort.h" requires "types.h" and "mem.h" +/* types.h defines uint, ulong + mem.h defines buffer */ +#endif + +/*------------------------------------------------------------------------------ + + Sort + + O(n) stable sort with good performance for all n + + sortv (uint *out, const uint *A, uint n, uint stride, buffer *buf) + sortv_long(ulong *out, const ulong *A, uint n, uint stride, buffer *buf) + + sortp (buffer *buf, int perm_start, const uint *A, uint n, uint stride) + sortp_long(buffer *buf, int perm_start, const ulong *A, uint n, uint stride) + + A, n, stride : specifices the input (stride is in bytes!) + out : the sorted values on output + + For the value sort, (sortv*) + A and out may alias (A == out) exactly when stride == sizeof(T) + + For the permutation sort, (sortp*) + The permutation can be both input (when start_perm!=0) and output, + following the convention that it is always at the start of the buffer buf: + uint *perm = buf->ptr; + The permutation denotes the ordering + A[perm[0]], A[perm[1]], ..., A[perm[n-1]] + (assuming stride == sizeof(uint) or sizeof(ulong) as appropriate) + and is re-arranged stably to give a sorted ordering. + Specifying start_perm==0 is equivalent to specifying + perm[i] = i, i=0,...,n-1 + for an initial permutation (but may be faster). + The buffer will be expanded as necessary to accomodate the permutation + and the required scratch space. + + Most code calls these routines indirectly via the higher-level routine + sarray_sort for sorting arrays of structs (see "sarray_sort.h"). + + ----------------------------------------------------------------------------*/ + +#define sortv_ui PREFIXED_NAME(sortv_ui) +#define sortv_ul PREFIXED_NAME(sortv_ul) +#define sortv_ull PREFIXED_NAME(sortv_ull) +#define sortp_ui PREFIXED_NAME(sortp_ui) +#define sortp_ul PREFIXED_NAME(sortp_ul) +#define sortp_ull PREFIXED_NAME(sortp_ull) + +#define sortv TYPE_LOCAL(sortv_ui,sortv_ul,sortv_ull) +#define sortp TYPE_LOCAL(sortp_ui,sortp_ul,sortp_ull) +#define sortv_long TYPE_GLOBAL(sortv_ui,sortv_ul,sortv_ull) +#define sortp_long TYPE_GLOBAL(sortp_ui,sortp_ul,sortp_ull) + +void sortv_ui(unsigned *out, const unsigned *A, uint n, unsigned stride, + buffer *restrict buf); +void sortv_ul(unsigned long *out, + const unsigned long *A, uint n, unsigned stride, + buffer *restrict buf); +uint *sortp_ui(buffer *restrict buf, int start_perm, + const unsigned *restrict A, uint n, unsigned stride); +uint *sortp_ul(buffer *restrict buf, int start_perm, + const unsigned long *restrict A, uint n, unsigned stride); +#if defined(USE_LONG_LONG) || defined(GLOBAL_LONG_LONG) +void sortv_ull(unsigned long long *out, + const unsigned long long *A, uint n, unsigned stride, + buffer *restrict buf); +uint *sortp_ull(buffer *restrict buf, int start_perm, + const unsigned long long *restrict A, uint n, unsigned stride); +#endif + +#endif diff --git a/src/jl/sort_imp.h b/src/jl/sort_imp.h new file mode 100644 index 0000000..08b05d1 --- /dev/null +++ b/src/jl/sort_imp.h @@ -0,0 +1,543 @@ +#if !defined(T) || !defined(SORT_SUFFIX) +#error sort_imp.h not meant to be compiled by itself +#endif + +#define sort_data TOKEN_PASTE(sort_data ,SORT_SUFFIX) +#define radix_count TOKEN_PASTE(radix_count ,SORT_SUFFIX) +#define radix_offsets TOKEN_PASTE(radix_offsets ,SORT_SUFFIX) +#define radix_zeros TOKEN_PASTE(radix_zeros ,SORT_SUFFIX) +#define radix_passv TOKEN_PASTE(radix_passv ,SORT_SUFFIX) +#define radix_sortv TOKEN_PASTE(radix_sortv ,SORT_SUFFIX) +#define radix_passp0_b TOKEN_PASTE(radix_passp0_b ,SORT_SUFFIX) +#define radix_passp_b TOKEN_PASTE(radix_passp_b ,SORT_SUFFIX) +#define radix_passp_m TOKEN_PASTE(radix_passp_m ,SORT_SUFFIX) +#define radix_passp_e TOKEN_PASTE(radix_passp_e ,SORT_SUFFIX) +#define radix_passp0_be TOKEN_PASTE(radix_passp0_be,SORT_SUFFIX) +#define radix_passp_be TOKEN_PASTE(radix_passp_be, SORT_SUFFIX) +#define radix_sortp TOKEN_PASTE(radix_sortp ,SORT_SUFFIX) +#define merge_sortv TOKEN_PASTE(merge_sortv ,SORT_SUFFIX) +#define merge_copy_perm TOKEN_PASTE(merge_copy_perm,SORT_SUFFIX) +#define merge_sortp0 TOKEN_PASTE(merge_sortp0 ,SORT_SUFFIX) +#define merge_sortp TOKEN_PASTE(merge_sortp ,SORT_SUFFIX) +#define heap_sortv TOKEN_PASTE(heap_sortv ,SORT_SUFFIX) + +#define sortv PREFIXED_NAME(TOKEN_PASTE(sortv,SORT_SUFFIX)) +#define sortp PREFIXED_NAME(TOKEN_PASTE(sortp,SORT_SUFFIX)) + +typedef struct { T v; uint i; } sort_data; + +#define INC_PTR(A,stride) ((A)=(T*)((char*)(A)+(stride))) +#define INDEX_PTR(A,stride,i) (*(T*)((char*)(A)+(i)*(stride))) + +/*------------------------------------------------------------------------------ + + Radix Sort + + stable; O(n+k) time and extra storage + where k = (digits in an int) * 2^(bits per digit) + (e.g. k = 4 * 256 = 1024 for 32-bit ints with 8-bit digits) + + brief description: + input sorted stably on each digit, starting with the least significant + counting sort is used for each digit: + a pass through the input counts the occurences of each digit value + on a second pass, each input has a known destination + + tricks: + all counting passes are combined into one + the counting pass also computes the inclusive bit-wise or of all inputs, + which is used to skip digit positions for which all inputs have zeros + + ----------------------------------------------------------------------------*/ + +#define STATIC_DIGIT_BUCKETS 1 + +#define DIGIT_BITS 8 +#define DIGIT_VALUES (1<i) count[i][val&DIGIT_MASK]++, val>>=DIGIT_BITS +#define COUNT_DIGIT_02(n,i) COUNT_DIGIT_01(n,i); COUNT_DIGIT_01(n,i+ 1) +#define COUNT_DIGIT_04(n,i) COUNT_DIGIT_02(n,i); COUNT_DIGIT_02(n,i+ 2) +#define COUNT_DIGIT_08(n,i) COUNT_DIGIT_04(n,i); COUNT_DIGIT_04(n,i+ 4) +#define COUNT_DIGIT_16(n,i) COUNT_DIGIT_08(n,i); COUNT_DIGIT_08(n,i+ 8) +#define COUNT_DIGIT_32(n,i) COUNT_DIGIT_16(n,i); COUNT_DIGIT_16(n,i+16) +#define COUNT_DIGIT_64(n,i) COUNT_DIGIT_32(n,i); COUNT_DIGIT_32(n,i+32) + +static T radix_count( + uint (*restrict count)[DIGIT_VALUES], + const T *restrict A, const T *const end, const unsigned stride) +{ + T bitorkey = 0; + memset(count,0,COUNT_SIZE*sizeof(uint)); + do { + T val=*A; + bitorkey|=val; + COUNT_DIGIT_64(DIGITS,0); + /* above macro expands to: + if(DIGITS> 0) count[ 0][val&DIGIT_MASK]++, val>>=DIGIT_BITS; + if(DIGITS> 1) count[ 1][val&DIGIT_MASK]++, val>>=DIGIT_BITS; + ... + if(DIGITS>63) count[63][val&DIGIT_MASK]++, val>>=DIGIT_BITS; + */ + } while(INC_PTR(A,stride),A!=end); + return bitorkey; +} + +#undef COUNT_DIGIT_01 +#undef COUNT_DIGIT_02 +#undef COUNT_DIGIT_04 +#undef COUNT_DIGIT_08 +#undef COUNT_DIGIT_16 +#undef COUNT_DIGIT_32 +#undef COUNT_DIGIT_64 + +static void radix_offsets(uint *restrict c) +{ + uint *const ce = c+DIGIT_VALUES; + uint sum = 0; + do { + const uint c0=c[0], c1=c[1], c2=c[2], c3=c[3]; + const uint o1=sum+c0, o2=o1+c1, o3=o2+c2; + c[0]=sum, c[1]=o1, c[2]=o2, c[3]=o3; + sum = o3+c3; + c+=4; + } while(c!=ce); +} + +static unsigned radix_zeros( + T bitorkey, uint (*restrict count)[DIGIT_VALUES], + unsigned *restrict shift, uint **restrict offsets) +{ + unsigned digits=0, sh=0; uint *c = &count[0][0]; + do { + if(bitorkey&DIGIT_MASK) *shift++ = sh, *offsets++ = c, ++digits, + radix_offsets(c); + } while(bitorkey>>=DIGIT_BITS,sh+=DIGIT_BITS,c+=DIGIT_VALUES,sh!=VALUE_BITS); + return digits; +} + +static void radix_passv( + const T *restrict A, const T *const end, const unsigned stride, + const unsigned sh, uint *const restrict off, T *const restrict out) +{ + do out[off[(*A>>sh)&DIGIT_MASK]++] = *A; while(INC_PTR(A,stride),A!=end); +} + +static void radix_sortv( + T *out, const T *A, const uint n, const unsigned stride, + T *work, uint (*restrict count)[DIGIT_VALUES]) +{ + const T *const end = &INDEX_PTR(A,stride,n); + T bitorkey = radix_count(count, A,end,stride); + unsigned shift[DIGITS]; uint *offsets[DIGITS]; + const unsigned digits = radix_zeros(bitorkey,count,shift,offsets); + if(digits==0) { + memset(out,0,n*sizeof(T)); + } else { + T *src, *dst; unsigned d; + if(out==A || (digits&1)==0) dst=out,src=work; + else src=out,dst=work; + radix_passv(A,end,stride,shift[0],offsets[0],src); + for(d=1;d!=digits;++d) { + T *t; + radix_passv(src,src+n,sizeof(T),shift[d],offsets[d],dst); + t=src,src=dst,dst=t; + } + if(src!=out) memcpy(out,src,n*sizeof(T)); + } +} + +static void radix_passp0_b( + const T *restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off, + sort_data *const restrict out) +{ + uint i=0; + do { + T v = *A; + sort_data *d = &out[off[(v>>sh)&DIGIT_MASK]++]; + d->v=v, d->i=i++; + } while(INC_PTR(A,stride),i!=n); +} + +static void radix_passp_b( + const uint *restrict p, + const T *const restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off, + sort_data *const out) +{ + const uint *const pe = p+n; + do { + uint j = *p++; + T v = INDEX_PTR(A,stride,j); + sort_data *d = &out[off[(v>>sh)&DIGIT_MASK]++]; + d->v=v, d->i=j; + } while(p!=pe); +} + +static void radix_passp_m( + const sort_data *restrict src, const sort_data *const end, + const unsigned sh, uint *const restrict off, + sort_data *const restrict out) +{ + do { + sort_data *d = &out[off[(src->v>>sh)&DIGIT_MASK]++]; + d->v=src->v,d->i=src->i; + } while(++src!=end); +} + +static void radix_passp_e( + const sort_data *restrict src, const sort_data *const end, + const unsigned sh, uint *const restrict off, + uint *const restrict out) +{ + do out[off[(src->v>>sh)&DIGIT_MASK]++]=src->i; while(++src!=end); +} + +static void radix_passp0_be( + uint *const restrict out, + const T *restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off) +{ + uint i=0; + do out[off[(*A>>sh)&DIGIT_MASK]++]=i++; while(INC_PTR(A,stride),i!=n); +} + +static void radix_passp_be( + uint *restrict p, + const T *restrict A, const uint n, const unsigned stride, + const unsigned sh, uint *const restrict off, + sort_data *restrict work) +{ + uint *q = p, *const qe = p+n; + uint *w = &work[0].i; + do { + uint j = *q++; + T v = INDEX_PTR(A,stride,j); + w[off[(v>>sh)&DIGIT_MASK]++]=j; + } while(q!=qe); + memcpy(p,w,n*sizeof(uint)); +} + +static void radix_sortp( + uint *restrict idx, uint perm_start, + const T *restrict A, const uint n, const unsigned stride, + sort_data *restrict work, + uint (*restrict count)[DIGIT_VALUES]) +{ + T bitorkey = radix_count(count, A,&INDEX_PTR(A,stride,n),stride); + unsigned shift[DIGITS]; uint *offsets[DIGITS]; + unsigned digits = radix_zeros(bitorkey,count,shift,offsets); + if(digits==0) { + if(!perm_start) { uint i=0; do *idx++=i++; while(i!=n); } + } else if(digits==1) { + if(perm_start) radix_passp_be (idx,A,n,stride,shift[0],offsets[0],work); + else radix_passp0_be(idx,A,n,stride,shift[0],offsets[0]); + } else { + sort_data *src, *dst; unsigned d; + if((digits&1)==0) dst=work,src=dst+n; + else src=work,dst=src+n; + if(perm_start) radix_passp_b (idx,A,n,stride,shift[0],offsets[0],src); + else radix_passp0_b( A,n,stride,shift[0],offsets[0],src); + for(d=1;d!=digits-1;++d) { + sort_data *t; + radix_passp_m(src,src+n,shift[d],offsets[d],dst); + t=src,src=dst,dst=t; + } + radix_passp_e(src,src+n,shift[d],offsets[d],idx); + } +} + +/*------------------------------------------------------------------------------ + + Merge Sort + + stable; O(n log n) time + + ----------------------------------------------------------------------------*/ + +#define MERGE_2(p,v) \ + if(VAL(v[1])3) odd<<=1,odd|=(n&1),n>>=1,c<<=1,b^=1; \ + } else \ + base-=n-(odd&1),n<<=1,n-=(odd&1),odd>>=1,c>>=1; \ + if(c==0) break; \ + p = buf[b]+base; \ + if(n==2) { \ + DATA v[2]; SETVAL(v[0],i), SETVAL(v[1],i+1); \ + MERGE_2(p,v); \ + i+=2; \ + } else if(n==3) { \ + DATA v[3]; SETVAL(v[0],i), SETVAL(v[1],i+1), SETVAL(v[2],i+2); \ + MERGE_3(p,v); \ + i+=3; \ + } else { \ + const uint na = n>>1, nb = (n+1)>>1; \ + const DATA *restrict ap = buf[b^1]+base, *const ae = ap+na; \ + DATA *restrict bp = p+na, *const be = bp+nb; \ + for(;;) { \ + if(VAL((*bp))i; while(p!=pe);*/ + uint n_by_8 = (n+7)/8; + switch(n%8) { + case 0: do { *idx++ = (p++)->i; + case 7: *idx++ = (p++)->i; + case 6: *idx++ = (p++)->i; + case 5: *idx++ = (p++)->i; + case 4: *idx++ = (p++)->i; + case 3: *idx++ = (p++)->i; + case 2: *idx++ = (p++)->i; + case 1: *idx++ = (p++)->i; + } while (--n_by_8 > 0); + } +} + +static void merge_sortp0( + uint *restrict idx, + const T *restrict A, const uint An, const unsigned stride, + sort_data *restrict work) +{ + sort_data *buf[2]; buf[0]=work+An,buf[1]=work; +#define DATA sort_data +#define VAL(x) x.v +#define SETVAL(x,ai) x.v=*A,INC_PTR(A,stride),x.i=ai + MERGE_SORT(); +#undef SETVAL +#undef VAL +#undef DATA + merge_copy_perm(idx,buf[0],An); +} + +static void merge_sortp( + uint *restrict idx, + const T *const restrict A, const uint An, const unsigned stride, + sort_data *restrict work) +{ + sort_data *buf[2]; buf[0]=work+An,buf[1]=work; +#define DATA sort_data +#define VAL(x) x.v +#define SETVAL(x,ai) x.i=idx[ai],x.v=INDEX_PTR(A,stride,x.i) + MERGE_SORT(); +#undef SETVAL +#undef VAL +#undef DATA + merge_copy_perm(idx,buf[0],An); +} + +#undef MERGE_SORT +#undef MERGE_3 +#undef MERGE_2 + +/*------------------------------------------------------------------------------ + + Heap Sort + + in-place, stability unobservable; O(n log n) time + + ----------------------------------------------------------------------------*/ +static void heap_sortv(T *const restrict A, unsigned n) +{ + unsigned i; + /* build heap */ + for(i=1;i>1; + if(A[p] >= item) continue; + do A[h]=A[p], h=p, p=(p-1)>>1; while(h && A[p] < item); + A[h] = item; + } + /* extract */ + for(i=n-1;i;--i) { + T item = A[i]; + unsigned h = 0; + A[i] = A[0]; + for(;;) { + unsigned ch = 1+(h<<1), r = ch+1; + if(r=i || item >= A[ch]) break; + A[h]=A[ch], h=ch; + } + A[h] = item; + } +} + + +/*------------------------------------------------------------------------------ + + Hybrid Stable Sort + + low-overhead merge sort when n is small, + otherwise asymptotically superior radix sort + + result = O(n) sort with good performance for all n + + A, n, stride : specifices the input, stride in bytes + out : the sorted values on output + + For the value sort, + A and out may alias (A == out) exactly when stride == sizeof(T), + in which case heap sort is used for small sizes + + For the permutation sort, + the permutation can be both input (when start_perm!=0) and output, + following the convention that it is always at the start of the buffer buf; + the buffer will be expanded as necessary to accomodate the permutation + and the required scratch space + + ----------------------------------------------------------------------------*/ + +void sortv(T *out, const T *A, uint n, unsigned stride, buffer *restrict buf) +{ + if(nptr); + } + } + } else if(STATIC_DIGIT_BUCKETS) { + static uint count[DIGITS][DIGIT_VALUES]; + buffer_reserve(buf,n*sizeof(T)); + radix_sortv(out, A,n,stride, (T*)buf->ptr,count); + } else { + T *restrict work; + uint (*restrict count)[DIGIT_VALUES]; + const size_t count_off=align_as(uint,n*sizeof(T)); + buffer_reserve(buf,count_off+sizeof(uint[DIGITS][DIGIT_VALUES])); + work = buf->ptr; + count = (uint(*)[DIGIT_VALUES])((char*)buf->ptr+count_off); + radix_sortv(out, A,n,stride, work,count); + } +} + +uint *sortp(buffer *restrict buf, int start_perm, + const T *restrict A, uint n, unsigned stride) +{ + uint *restrict perm; + sort_data *restrict work; + size_t work_off=align_as(sort_data,n*sizeof(uint)); + if(nptr; + work = (sort_data*)((char*)buf->ptr+work_off); + if(n<2) { + if(n==1) *perm=0; + } else { + if(start_perm) merge_sortp (perm, A,n,stride, work); + else merge_sortp0(perm, A,n,stride, work); + } + } else if(STATIC_DIGIT_BUCKETS){ + static uint count[DIGITS][DIGIT_VALUES]; + buffer_reserve(buf,work_off+2*n*sizeof(sort_data)); + perm = buf->ptr; + work = (sort_data*)((char*)buf->ptr+work_off); + radix_sortp(perm,start_perm, A,n,stride, work,count); + } else { + uint (*restrict count)[DIGIT_VALUES]; + const size_t count_off=align_as(uint,work_off+2*n*sizeof(sort_data)); + buffer_reserve(buf,count_off+sizeof(uint[DIGITS][DIGIT_VALUES])); + perm = buf->ptr; + work = (sort_data*)((char*)buf->ptr+work_off); + count = (uint(*)[DIGIT_VALUES])((char*)buf->ptr+count_off); + radix_sortp(perm,start_perm, A,n,stride, work,count); + } + return perm; +} + +#undef STATIC_DIGIT_BUCKETS + +#undef DIGIT_BITS +#undef DIGIT_VALUES +#undef DIGIT_MASK +#undef CEILDIV +#undef DIGITS +#undef VALUE_BITS +#undef COUNT_SIZE + +#undef INDEX_PTR +#undef INC_PTR + +#undef sortp +#undef sortv + +#undef merge_sortp +#undef merge_sortp0 +#undef merge_sortv +#undef radix_sortp +#undef radix_passp_be +#undef radix_passp0_be +#undef radix_passp_e +#undef radix_passp_m +#undef radix_passp_b +#undef radix_passp0_b +#undef radix_sortv +#undef radix_passv +#undef radix_zeros +#undef radix_offsets +#undef radix_count +#undef sort_data + diff --git a/src/jl/sort_test.c b/src/jl/sort_test.c new file mode 100644 index 0000000..acd0bb3 --- /dev/null +++ b/src/jl/sort_test.c @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sort.h" + +#define SMALL 22 +#define NUM 500 +#define SI 9 + +ulong A[NUM][SI], Av[NUM]; +uint B[NUM][SI], Bv[NUM]; + +uint P[NUM], Q[NUM]; + +int main() +{ + buffer buf = {0,0,0}; + uint i; + + /*buffer_init(&buf, sortp_long_worksize(NUM,0));*/ + +#if 0 + printf("\nsource:\n"); +#endif + for(i=0;i!=NUM;++i) { + A[i][0]=rand(); + A[i][0]<<=CHAR_BIT*sizeof(int)-1; + A[i][0]^=rand(); + A[i][0]<<=CHAR_BIT*sizeof(int)-1; + A[i][0]^=rand(); + if(0) A[i][0]&=0x000ff00; + B[i][0]=A[i][0]; +#if 0 + printf("%016lx\t%016lx\n",(unsigned long)A[i][0],(unsigned long)B[i][0]); +#endif + } +#if 0 + printf("\n"); +#endif + printf("merge sort:\n"); + for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i; + sortv_long(Av, &A[0][0],SMALL,sizeof(ulong[SI]), &buf); + sortp_long(&buf,0, &A[0][0],SMALL,sizeof(ulong[SI])); + memcpy(P,buf.ptr,SMALL*sizeof(uint)); + memcpy(buf.ptr,Q,SMALL*sizeof(uint)); + sortp_long(&buf,1, &A[0][0],SMALL,sizeof(ulong[SI])); + memcpy(Q,buf.ptr,SMALL*sizeof(uint)); + for(i=0;i!=SMALL;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)A[P[i]][0], + A[P[i]][0]==A[Q[i]][0], + Av[i]==A[P[i]][0]); + printf("\n"); + printf("radix sort:\n"); + for(i=0;i!=NUM;++i) Q[i]=NUM-1-i; + sortv_long(Av, &A[0][0],NUM,sizeof(ulong[SI]), &buf); + sortp_long(&buf,0, &A[0][0],NUM,sizeof(ulong[SI])); + memcpy(P,buf.ptr,NUM*sizeof(uint)); + memcpy(buf.ptr,Q,NUM*sizeof(uint)); + sortp_long(&buf,1, &A[0][0],NUM,sizeof(ulong[SI])); + memcpy(Q,buf.ptr,NUM*sizeof(uint)); + for(i=0;i!=NUM;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)A[P[i]][0], + A[P[i]][0]==A[Q[i]][0], + Av[i]==A[P[i]][0]); + + printf("\nsmall integers:\n"); + printf("\n"); + + printf("heap sort:\n"); + for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i; + sortv(Q, Q,SMALL,sizeof(uint), &buf); + for(i=0;i!=SMALL;++i) printf("\t%u\n",(unsigned)Q[i]); + + printf("merge sort:\n"); + for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i; + sortv(Bv, &B[0][0],SMALL,sizeof(uint[SI]), &buf); + sortp(&buf,0, &B[0][0],SMALL,sizeof(uint[SI])); + memcpy(P,buf.ptr,SMALL*sizeof(uint)); + memcpy(buf.ptr,Q,SMALL*sizeof(uint)); + sortp(&buf,1, &B[0][0],SMALL,sizeof(uint[SI])); + memcpy(Q,buf.ptr,SMALL*sizeof(uint)); + for(i=0;i!=SMALL;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)B[P[i]][0], + B[P[i]][0]==B[Q[i]][0], + B[P[i]][0]==Bv[i]); + printf("\n"); + printf("radix sort:\n"); + for(i=0;i!=NUM;++i) Q[i]=NUM-1-i; + sortv(Bv, &B[0][0],NUM,sizeof(uint[SI]), &buf); + sortp(&buf,0, &B[0][0],NUM,sizeof(uint[SI])); + memcpy(P,buf.ptr,NUM*sizeof(uint)); + memcpy(buf.ptr,Q,NUM*sizeof(uint)); + sortp(&buf,1, &B[0][0],NUM,sizeof(uint[SI])); + memcpy(Q,buf.ptr,NUM*sizeof(uint)); + for(i=0;i!=NUM;++i) + printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i], + (unsigned long)B[P[i]][0], + B[P[i]][0]==B[Q[i]][0], + B[P[i]][0]==Bv[i]); + buffer_free(&buf); + return 0; +} + diff --git a/src/jl/sort_test2.c b/src/jl/sort_test2.c new file mode 100644 index 0000000..4481a16 --- /dev/null +++ b/src/jl/sort_test2.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sort.h" +#include "rdtsc.h" + +#if 1 + +DEFINE_HW_COUNTER() + +#define N (1<<20) + +ulong A[N], out[N]; +uint P[N]; + +int main() +{ + buffer buf = null_buffer; + uint i; + unsigned long long tic, toc; + unsigned r; + #define TIME(t, repeat, what) do { \ + for(r=repeat;r;--r) { what; } \ + tic = getticks(); \ + for(r=repeat;r;--r) { what; } \ + toc = getticks(); \ + t = toc-tic; \ + } while(0) + + for(i=0;i!=N;++i) { + A[i]=rand(); + A[i]<<=CHAR_BIT*sizeof(int)-1; + A[i]^=rand(); + A[i]<<=CHAR_BIT*sizeof(int)-1; + A[i]^=rand(); + if(0) A[i]&=0x000ff00; + } + + for(i=N;i;i>>=1) { + unsigned long long t; + TIME(t, (N/i), + sortv_long(out, A,i,sizeof(ulong), &buf)); + printf("sortv %d : %g cycles per item\n", + (int)i, t/(double)(N/i)/(double)i); + } + + for(i=N;i;i>>=1) { + unsigned long long t; + TIME(t, (N/i), + sortp_long(&buf,0, A,i,sizeof(ulong))); + printf("sortp %d : %g cycles per item\n", + (int)i, t/(double)(N/i)/(double)i); + } + + buffer_free(&buf); + return 0; +} + +#else + +int main() +{ + return 0; +} + +#endif + diff --git a/src/jl/spchol_test.c b/src/jl/spchol_test.c new file mode 100644 index 0000000..5fc6596 --- /dev/null +++ b/src/jl/spchol_test.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include "c99.h" +#include "name.h" +#include "fail.h" +#include "types.h" +#include "mem.h" +#include "sparse_cholesky.h" + +int main() +{ +#define x -1 + + uint i,n=7; + uint Aj [] = {0,2, 1,2,6, 0,1,2, 3,5,6, 4,5, 3,4,5, 1,3,6}; + double A[] = {2,x, 2,x,x, x,x,2, 2,x,x, 2,x, x,x,2, x,x,2}; +#undef x + uint Arp[] = {0,2,5,8,11,13,16,19}; + double x[7], b[7] = {0,0,0,0, 0,0,0}; + uint o[7] = {0,2,1,6,3,5,4}; +/* + uint i,n=10; + uint Aj [] = {0,2,7, 1,4,9, 0,2,6, 3,8,9, 1,4,8,9, 5,6,7, 2,5,6, 0,5,7,8,9, 3,4,7,8, 1,3,4,7,9}; + real A [] = {3,x,x, 2,x,x, x,2,x, 2,x,x, x,3,x,x, 2,x,x, x,x,2, x,x,4,x,x, x,x,x,3, x,x,x,x,4}; +#undef x + uint Arp[] = {0, 3, 6, 9, 12, 16, 19, 22, 27, 31, 36}; + real b[] = {1,2,3,4,5, 6,7,8,9,10}; +*/ + struct sparse_cholesky data; + buffer buf; + buffer_init(&buf,4); + sparse_cholesky_factor(n,Arp,Aj,A,&data,&buf); + + for(i=0;i +# define tensor_dot(a,b,n) cblas_ddot((int)(n),a,1,b,1) +# define tensor_mxv(y,ny,A,x,nx) \ + cblas_dgemv(CblasColMajor,CblasNoTrans,(int)ny,(int)nx, \ + 1.0,A,(int)ny,x,1,0.0,y,1) +# define tensor_mtxv(y,ny,A,x,nx) \ + cblas_dgemv(CblasColMajor,CblasTrans,(int)nx,(int)ny, \ + 1.0,A,(int)nx,x,1,0.0,y,1) +# define tensor_mxm(C,nc,A,na,B,nb) \ + cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans, \ + (int)nc,(int)nb,(int)na,1.0, \ + A,(int)nc,B,(int)na,0.0,C,(int)nc) +# define tensor_mtxm(C,nc,A,na,B,nb) \ + cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans, \ + (int)nc,(int)nb,(int)na,1.0, \ + A,(int)na,B,(int)na,0.0,C,(int)nc) +#else +# define tensor_dot PREFIXED_NAME(tensor_dot ) +# define tensor_mtxm PREFIXED_NAME(tensor_mtxm) +double tensor_dot(const double *a, const double *b, uint n); + +/* C (nc x nb) = [A (na x nc)]^T * B (na x nb); all column-major */ +void tensor_mtxm(double *C, uint nc, + const double *A, uint na, const double *B, uint nb); +# if defined(USE_NAIVE_BLAS) +# define tensor_mxv PREFIXED_NAME(tensor_mxv ) +# define tensor_mtxv PREFIXED_NAME(tensor_mtxv) +# define tensor_mxm PREFIXED_NAME(tensor_mxm ) +/* y = A x */ +void tensor_mxv(double *y, uint ny, const double *A, const double *x, uint nx); + +/* y = A^T x */ +void tensor_mtxv(double *y, uint ny, const double *A, const double *x, uint nx); + +/* C (nc x nb) = A (nc x na) * B (na x nb); all column-major */ +void tensor_mxm(double *C, uint nc, + const double *A, uint na, const double *B, uint nb); +# else +# define nek_mxm FORTRAN_UNPREFIXED(mxm,MXM) +/* C (na x nc) = A (na x nb) * B (nb x nc); all column-major */ +void nek_mxm(const double *A, const uint *na, + const double *B, const uint *nb, + double *C, const uint *nc); +/* C (nc x nb) = A (nc x na) * B (na x nb); all column-major */ +static void tensor_mxm(double *C, uint nc, + const double *A, uint na, const double *B, uint nb) +{ nek_mxm(A,&nc,B,&na,C,&nb); } + +/* y = A x */ +static void tensor_mxv(double *y, uint ny, + const double *A, const double *x, uint nx) +{ uint one=1; nek_mxm(A,&ny,x,&nx,y,&one); } + +/* y = A^T x */ +static void tensor_mtxv(double *y, uint ny, + const double *A, const double *x, uint nx) +{ uint one=1; nek_mxm(x,&one,A,&nx,y,&ny); } + +# endif +#endif + +/*-------------------------------------------------------------------------- + 1-,2-,3-d Tensor Application of Row Vectors (for Interpolation) + + the 3d case: + v = tensor_i3(Jr,nr, Js,ns, Jt,nt, u, work) + gives v = [ Jr (x) Js (x) Jt ] u + where Jr, Js, Jt are row vectors (interpolation weights) + u is nr x ns x nt in column-major format (inner index is r) + v is a scalar + --------------------------------------------------------------------------*/ + +static double tensor_i1(const double *Jr, uint nr, const double *u) +{ + return tensor_dot(Jr,u,nr); +} + +/* work holds ns doubles */ +static double tensor_i2(const double *Jr, uint nr, + const double *Js, uint ns, + const double *u, double *work) +{ + tensor_mtxv(work,ns, u, Jr,nr); + return tensor_dot(Js,work,ns); +} + +/* work holds ns*nt + nt doubles */ +static double tensor_i3(const double *Jr, uint nr, + const double *Js, uint ns, + const double *Jt, uint nt, + const double *u, double *work) +{ + double *work2 = work+nt; + tensor_mtxv(work2,ns*nt, u, Jr,nr); + tensor_mtxv(work ,nt , work2, Js,ns); + return tensor_dot(Jt,work,nt); +} + +/*-------------------------------------------------------------------------- + 1-,2-,3-d Tensor Application of Row Vectors + for simultaneous Interpolation and Gradient computation + + the 3d case: + v = tensor_ig3(g, wtr,nr, wts,ns, wtt,nt, u, work) + gives v = [ Jr (x) Js (x) Jt ] u + g_0 = [ Dr (x) Js (x) Jt ] u + g_1 = [ Jr (x) Ds (x) Jt ] u + g_2 = [ Jr (x) Js (x) Dt ] u + where Jr,Dr,Js,Ds,Jt,Dt are row vectors, + Jr=wtr, Dr=wtr+nr, etc. + (interpolation & derivative weights) + u is nr x ns x nt in column-major format (inner index is r) + v is a scalar, g is an array of 3 doubles + --------------------------------------------------------------------------*/ + +static double tensor_ig1(double g[1], + const double *wtr, uint nr, + const double *u) +{ + g[0] = tensor_dot(wtr+nr,u,nr); + return tensor_dot(wtr ,u,nr); +} + +/* work holds 2*nr doubles */ +static double tensor_ig2(double g[2], + const double *wtr, uint nr, + const double *wts, uint ns, + const double *u, double *work) +{ + tensor_mxm(work,nr, u,ns, wts,2); + g[0] = tensor_dot(wtr+nr,work ,nr); + g[1] = tensor_dot(wtr ,work+nr,nr); + return tensor_dot(wtr ,work ,nr); +} + +/* work holds 2*nr*ns + 3*nr doubles */ +static double tensor_ig3(double g[3], + const double *wtr, uint nr, + const double *wts, uint ns, + const double *wtt, uint nt, + const double *u, double *work) +{ + const uint nrs = nr*ns; + double *a = work, *b = work+2*nrs, *c=b+2*nr; + tensor_mxm(a,nrs, u,nt, wtt,2); + tensor_mxm(b,nr, a,ns, wts,2); + tensor_mxv(c,nr, a+nrs, wts,ns); + g[0] = tensor_dot(b , wtr+nr, nr); + g[1] = tensor_dot(b+nr, wtr , nr); + g[2] = tensor_dot(c , wtr , nr); + return tensor_dot(b , wtr , nr); +} + +/* + out - nr x ns + u - mr x ms + Jrt - mr x nr, Jst - ms x ns + work - nr x ms +*/ +static void tensor_2t(double *out, + const double *Jrt, uint nr, uint mr, + const double *Jst, uint ns, uint ms, + const double *u, double *work) +{ + tensor_mtxm(work,nr, Jrt,mr, u,ms); + tensor_mxm(out,nr, work,ms, Jst,ns); +} + +/* + out - nr x ns x nt + u - mr x ms x mt + Jrt - mr x nr, Jst - ms x ns, Jtt - mt x nt + work - nr*ms*mt + nr*ns*mt = nr*(ms+ns)*mt +*/ +static void tensor_3t(double *out, + const double *Jrt, uint nr, uint mr, + const double *Jst, uint ns, uint ms, + const double *Jtt, uint nt, uint mt, + const double *u, double *work) +{ + const uint nrs=nr*ns, mst=ms*mt, nrms=nr*ms; + uint k; + double *work2 = work+nr*mst; + double *p; const double *q; + tensor_mtxm(work,nr, Jrt,mr, u,mst); + for(k=0,p=work2,q=work;k/dev/null +if [ $? -ne 0 ]; then + echo "FATAL ERROR: Cannot find $F77!" + exit 1 +fi +\rm test_f77.o 2>/dev/null + +# basic compiler test +cat > test_f77.f << _ACEOF + subroutine test + end +_ACEOF +$F77 -c test_f77.f >/dev/null +if [ ! -f test_f77.o ]; then + echo "FATAL ERROR: Basic compiler test for $F77 failed!" + exit 1 +fi +\rm test_f77.* 2>/dev/null + +# test C compiler +which `echo $CC | awk '{print $1}'` 1>/dev/null +if [ $? -ne 0 ]; then + echo "FATAL ERROR: Cannot find $CC!" + exit 1 +fi +\rm test_cc.o 2>/dev/null + +cat > test_cc.c << _ACEOF + void function(){} +_ACEOF +$CC -c test_cc.c 1>/dev/null +if [ ! -f test_cc.o ]; then + echo "FATAL ERROR: Basic compiler test for $CC failed!" + exit 1 +fi +\rm test_cc.* 2>/dev/null + +# initial clean-up +rm -f nekbone 2>/dev/null + +# Check if the compiler adds an underscore to external functions +UNDERSCORE=false +cat > test_underscore.f << _ACEOF + subroutine underscore_test + call byte_write + end +_ACEOF +$F77 -c test_underscore.f 2>&1 >/dev/null +nm test_underscore.o | grep byte_write_ 1>/dev/null +if [ $? -eq 0 ] +then + UNDERSCORE=true +fi +\rm test_underscore.* 2>/dev/null + +# trying to figure which compiler the wrapper is using +F77ok=0 + +F77comp_=`$F77 -showme 2>/dev/null 1>.tmp || true` +F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true` +if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 +fi + +if [ $F77ok -eq 0 ]; then + F77comp_=`$F77 -show 2>/dev/null 1>.tmp || true` + F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true` + if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 + fi +fi + +if [ $F77ok -eq 0 ]; then + F77comp_=`$F77 -craype-verbose 2>/dev/null 1>.tmp || true` + F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true` + if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 + fi +fi + +if [ $F77ok -eq 0 ]; then + F77comp=`echo $F77 | awk '{print $1}'` + if [ -f "`which $F77comp 2>/dev/null`" ]; then + F77ok=1 + fi +fi + +\rm -f .tmp +if [ $F77ok -eq 0 ]; then + F77comp="unknown" +fi + +CFE_FLAG="" +# assign F77 compiler specific flags +case $F77comp in + *pgf*) P="-r8 -Mpreprocess" + ;; + *gfortran*) P="-fcray-pointer -fdefault-real-8 -x f77-cpp-input" + ;; + *ifort*) P="-r8 -fpconstant -fpp" + CFE_FLAG="-xHost" + ;; + *pathf*) P="-r8 -cpp -fno-second-underscore" + ;; + *xlf*) P="-qrealsize=8 -qdpc=e -qsuffix=cpp=f" + PPPO="-WF," + F77="${F77} -qsuppress=cmpmsg" + ;; + *ftn*) P="-r8 -Mpreprocess" + ;; + *sunf*) P="-r8const -xtypemap=real:64 -fpp" + ;; + *open*) P="-r8 -cpp -fno-second-underscore" + ;; + *) echo "ERROR: Unable to detect compiler!" + echo " - don't know how to promote datatype REAL to 8 bytes" + echo " - don't know how to invoke the C pre-processor (CPP) before compilation" + echo " Please edit the makefile and specify the requested compiler flags using the P variable." + echo "" + P="" + NOCOMPILE=1 + read;; +esac +export PPPO + +# Check ptr size +cat > tmp.c << _ACEOF +#include +#include +int main() +{ + int *p;printf("%li\n",sizeof(p)); +} +_ACEOF +$CC $CFE_FLAG tmp.c 2>&1>/dev/null +ptrSize=`./a.out` +rm tmp.c a.out +if [ "$ptrSize" == "8" ] +then + PPLIST="${PPLIST} PTRSIZE8" +fi + +# set preprocessor symbols +if [ "$IFMPI" == "false" -o "$IFMPI" == "no" ]; then + IFMPI=false +else + # default + IFMPI=true + PPLIST="${PPLIST} MPI" +fi +export IFMPI + +# Check size of long int +cat > tmp.c << _ACEOF +#include +#include +int main() +{ + int i; + i=sizeof(long int); + printf("%i\n",i); +} +_ACEOF +$CC $CFE_FLAG tmp.c 2>&1>/dev/null +longIntTest=`./a.out` +rm tmp.c a.out +if [ "$longIntTest" == "8" ] +then + PPLIST="${PPLIST} LONGINT8" +fi + +if [ "$UNDERSCORE" == "true" ]; then + PPLIST="${PPLIST} UNDERSCORE" +fi + +PPLIST="${PPLIST} GLOBAL_LONG_LONG" + +MXM_USER="mxm_std.o blas.o" +echo $PPLIST | grep 'BGP' >/dev/null +if [ $? -eq 0 ]; then + MXM_USER="mxm_std.o bg_aligned3.o bg_mxm44.o bg_mxm44_uneven.o bg_mxm3.o blas.o" + OPT_FLAGS_STD="-qarch=450 -qtune=450" + OPT_FLAGS_MAG="-O5 -qarch=450d -qtune=450" +fi +echo $PPLIST | grep 'BLAS_MXM' >/dev/null +if [ $? -eq 0 ]; then + MXM_USER="mxm_std.o" +fi + +# set optimization flags +L0="\$(G) -O0" +L2="\$(G) -O2" +L3="\$(G) -O3" +L4="\$(L3)" + +# user specified opt flags +if [ "$OPT_FLAGS_STD" != "" ]; then + echo $OPT_FLAGS_STD | grep "\-O." 1>/dev/null + if [ $? -eq 0 ]; then + L2="\$(G) $OPT_FLAGS_STD" + L3="\$(G) $OPT_FLAGS_STD" + else + L2="\$(G) -O2 $OPT_FLAGS_STD" + L3="\$(G) -O3 $OPT_FLAGS_STD" + fi +fi + +if [ "$OPT_FLAGS_MAG" != "" ]; then + L4="\$(G) $OPT_FLAGS_MAG" +fi + +if [ "$USR_LIB" != "" ]; then + USR_LFLAGS="${USR_LFLAGS} ${USR_LIB}" +fi + +# tweak makefile template +echo "generating makefile ..." +rm -rf makefile 2>/dev/null + +sed -e "s:^F77[ ]*=.*:F77\:=$F77:" \ +-e "s:^CC[ ]*=.*:CC\:=$CC:" \ +-e "s:^G[ ]*=.*:G\:=$G:" \ +-e "s:^OPT_FLAGS[ ]*=.*:OPT_FLAGS\:=$OPT_FLAGS:" \ +-e "s/^P[ ]*=.*/P:=$P/" \ +-e "s/^L0[ ]*=.*/L0=$L0/" \ +-e "s/^L2[ ]*=.*/L2=$L2/" \ +-e "s/^L3[ ]*=.*/L3=$L3/" \ +-e "s/^L4[ ]*=.*/L4=$L4/" \ +-e "s/^PPPO[ ]*=.*/PPPO=$PPPO/" \ +-e "s/^PPS[ ]*=.*/PPS=$PPLIST/" \ +-e "s:^MXM[ ]*=.*:MXM=$MXM_USER:" \ +-e "s/^IFMPI[ ]*=.*/IFMPI:=$IFMPI/" \ +-e "s:^USR[ ]*=.*:USR\:=$USR:" \ +-e "s:^USR_LFLAGS[ ]*=.*:USR_LFLAGS\:=$USR_LFLAGS:" \ +-e "s:^S[ ]*=.*:S\:=${SOURCE_ROOT}:" ./makefile.template >.makefile + +echo $G | grep '\-g' 1>/dev/null +if [ $? -eq 0 ]; then + sed 's/-O[1-4]/-O0/g' .makefile > .makefile.tmp + mv .makefile.tmp .makefile + echo "Activate DEBUG mode" +fi + +if [ "$USR" != "" ]; then + echo "###########################################################" >> makefile + echo "include makefile_usr.inc" >> .makefile +fi + +if [ -f .makefile ]; then + sed -e "1i\\ +### makefile automatically created by makenek `date +"%m/%d/%Y %T"` ###" \ +-e "s:^CASEDIR[ ]*=.*:CASEDIR\:=${CASEDIR}:" \ +-e "s:^CASENAME[ ]*=.*:CASENAME\:=${CASENAME}:" .makefile > makefile +else + echo "ERROR: Nek Makefile could not be created!" + exit 1 +fi +\rm .makefile 2>/dev/null + +# tweak SIZE file +if [ -f "./SIZE" ]; then + cat SIZE | grep -i 'lxo' >/dev/null +else + echo "FATAL ERROR: Cannot find SIZE" + exit 1 +fi + +if [ $NOCOMPILE -eq 1 ]; then + exit 0 +fi diff --git a/src/math.f b/src/math.f new file mode 100644 index 0000000..48a24c3 --- /dev/null +++ b/src/math.f @@ -0,0 +1,1402 @@ +c----------------------------------------------------------------------- + SUBROUTINE BLANK(A,N) + CHARACTER*1 A(1) + CHARACTER*1 BLNK + SAVE BLNK + DATA BLNK /' '/ + + DO 10 I=1,N + A(I)=BLNK + 10 CONTINUE + RETURN + END +c----------------------------------------------------------------------- + SUBROUTINE VSQ (A,N) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I) = A(I)**2 + RETURN + END +c----------------------------------------------------------------------- + SUBROUTINE VSQRT(A,N) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I) = SQRT(A(I)) + RETURN + END +c----------------------------------------------------------------------- + subroutine invers2(a,b,n) + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=1./B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine invcol1(a,n) + REAL A(1) + + DO 100 I=1,N + A(I)=1./A(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine invcol2(a,b,n) + + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=A(I)/B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine invcol3(a,b,c,n) + REAL A(1),B(1),C(1) + + + DO 100 I=1,N + A(I)=B(I)/C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine col4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + DO 100 I=1,N + A(I)=B(I)*C(I)*D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine Xaddcol3(a,b,c,n) + REAL A(1),B(1),C(1) + + DO 100 I=1,N + A(I)=A(I)+B(I)*C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine addcol4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + DO 100 I=1,N + A(I)=A(I)+B(I)*C(I)*D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine ascol5 (a,b,c,d,e,n) + REAL A(1),B(1),C(1),D(1),E(1) + + DO 100 I=1,N + A(I) = B(I)*C(I)-D(I)*E(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine sub2(a,b,n) + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=A(I)-B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine sub3(a,b,c,n) + REAL A(1),B(1),C(1) + + DO 100 I=1,N + A(I)=B(I)-C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine subcol3(a,b,c,n) + REAL A(1),B(1),C(1) + + + DO 100 I=1,N + A(I)=A(I)-B(I)*C(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine subcol4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + + DO 100 I=1,N + A(I)=A(I)-B(I)*C(I)*D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine rzero(a,n) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I ) = 0.0 + return + END +c----------------------------------------------------------------------- + subroutine izero(a,n) + INTEGER A(1) + + DO 100 I = 1, N + 100 A(I ) = 0 + return + END +c----------------------------------------------------------------------- + subroutine ione(a,n) + INTEGER A(1) + DO 100 I = 1, N + 100 A(I ) = 1 + return + END +c----------------------------------------------------------------------- + subroutine rone(a,n) + DIMENSION A(1) + DO 100 I = 1, N + 100 A(I ) = 1.0 + return + END +c----------------------------------------------------------------------- + subroutine cfill(a,b,n) + DIMENSION A(1) + + DO 100 I = 1, N + 100 A(I) = B + return + END +c----------------------------------------------------------------------- + subroutine ifill(ia,ib,n) + DIMENSION IA(1) + + DO 100 I = 1, N + 100 IA(I) = IB + return + END +c----------------------------------------------------------------------- + subroutine copy(a,b,n) + real a(1),b(1) + + do i=1,n + a(i)=b(i) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine chcopy(a,b,n) + CHARACTER*1 A(1), B(1) + + DO 100 I = 1, N + 100 A(I) = B(I) + return + END + + subroutine icopy(a,b,n) + INTEGER A(1), B(1) + + DO 100 I = 1, N + 100 A(I) = B(I) + return + END +c----------------------------------------------------------------------- + subroutine i8copy(a,b,n) + INTEGER*8 A(1), B(1) + + DO 100 I = 1, N + 100 A(I) = B(I) + return + END +c----------------------------------------------------------------------- + subroutine chsign(a,n) + REAL A(1) + + DO 100 I=1,N + A(I) = -A(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine cmult(a,const,n) + REAL A(1) + + DO 100 I=1,N + A(I)=A(I)*CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine cadd(a,const,n) + REAL A(1) + + DO 100 I=1,N + A(I)=A(I)+CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine iadd(i1,iscal,n) + DIMENSION I1(1) + + DO 10 I=1,N + I1(I)=I1(I)+ISCAL + 10 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine cadd2(a,b,const,n) + REAL A(1),B(1) + + DO 100 I=1,N + A(I)=B(I)+CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + real function vlmin(vec,n) + REAL VEC(1) + TMIN = 99.0E20 + + DO 100 I=1,N + TMIN = MIN(TMIN,VEC(I)) + 100 CONTINUE + VLMIN = TMIN + return + END +c----------------------------------------------------------------------- + integer function ivlmin(vec,n) + integer vec(1),tmin + if (n.eq.0) then + ivlmin=0 + return + endif + tmin = 8888888 + do i=1,n + tmin = min(tmin,vec(i)) + enddo + ivlmin = tmin + return + end +c----------------------------------------------------------------------- + integer function ivlmax(vec,n) + integer vec(1),tmax + if (n.eq.0) then + ivlmax=0 + return + endif + TMAX =-8888888 + do i=1,n + TMAX = MAX(TMAX,VEC(I)) + enddo + Ivlmax = tmax + return + end +c----------------------------------------------------------------------- + real function vlmax(vec,n) + REAL VEC(1) + TMAX =-99.0E20 + do i=1,n + TMAX = MAX(TMAX,VEC(I)) + enddo + VLMAX = TMAX + return + END +c----------------------------------------------------------------------- + real function vlamax(vec,n) + REAL VEC(1) + TAMAX = 0.0 + + DO 100 I=1,N + TAMAX = MAX(TAMAX,ABS(VEC(I))) + 100 CONTINUE + VLAMAX = TAMAX + return + END +c----------------------------------------------------------------------- + real function vlsum(vec,n) + REAL VEC(1) + + SUM = 0. + + DO 100 I=1,N + SUM=SUM+VEC(I) + 100 CONTINUE + VLSUM = SUM + return + END +c----------------------------------------------------------------------- + subroutine vcross (u1,u2,u3,v1,v2,v3,w1,w2,w3,n) +C +C Compute a Cartesian vector cross product. +C + DIMENSION U1(1),U2(1),U3(1) + DIMENSION V1(1),V2(1),V3(1) + DIMENSION W1(1),W2(1),W3(1) + + + DO 100 I=1,N + U1(I) = V2(I)*W3(I) - V3(I)*W2(I) + U2(I) = V3(I)*W1(I) - V1(I)*W3(I) + U3(I) = V1(I)*W2(I) - V2(I)*W1(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine vdot2 (dot,u1,u2,v1,v2,n) +C +C Compute a Cartesian vector dot product. 2-d version +C + DIMENSION DOT(1) + DIMENSION U1(1),U2(1) + DIMENSION V1(1),V2(1) + + + DO 100 I=1,N + DOT(I) = U1(I)*V1(I) + U2(I)*V2(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine vdot3 (dot,u1,u2,u3,v1,v2,v3,n) +C +C Compute a Cartesian vector dot product. 3-d version +C + DIMENSION DOT(1) + DIMENSION U1(1),U2(1),U3(1) + DIMENSION V1(1),V2(1),V3(1) + + + DO 100 I=1,N + DOT(I) = U1(I)*V1(I) + U2(I)*V2(I) + U3(I)*V3(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine addtnsr(s,h1,h2,h3,nx,ny,nz) +C +C Map and add to S a tensor product form of the three functions H1,H2,H3. +C This is a single element routine used for deforming geometry. +C + DIMENSION H1(1),H2(1),H3(1) + DIMENSION S(NX,NY,NZ) + + DO 200 IZ=1,NZ + DO 200 IY=1,NY + HH = H2(IY)*H3(IZ) + DO 100 IX=1,NX + S(IX,IY,IZ)=S(IX,IY,IZ)+HH*H1(IX) + 100 CONTINUE + 200 CONTINUE + return + END + function ltrunc(string,l) + CHARACTER*1 STRING(L) + CHARACTER*1 BLNK + DATA BLNK/' '/ + + DO 100 I=L,1,-1 + L1=I + IF (STRING(I).NE.BLNK) GOTO 200 + 100 CONTINUE + L1=0 + 200 CONTINUE + LTRUNC=L1 + return + END +c----------------------------------------------------------------------- + function mod1(i,n) +C +C Yields MOD(I,N) with the exception that if I=K*N, result is N. +C + MOD1=0 + IF (I.EQ.0) THEN + return + ENDIF + IF (N.EQ.0) THEN + WRITE(6,*) + $ 'WARNING: Attempt to take MOD(I,0) in function mod1.' + return + ENDIF + II = I+N-1 + MOD1 = MOD(II,N)+1 + return + END +c----------------------------------------------------------------------- + integer function log2(k) + RK=(K) + RLOG=LOG10(RK) + RLOG2=LOG10(2.0) + RLOG=RLOG/RLOG2+0.5 + LOG2=INT(RLOG) + return + END +c----------------------------------------------------------------------- + subroutine iflip(i1,n) + DIMENSION I1(1) + N1=N+1 + N2=N/2 + DO 10 I=1,N2 + ILAST=N1-I + ITMP=I1(ILAST) + I1(ILAST)=I1(I) + I1(I)=ITMP + 10 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine iswap(b,ind,n,temp) + INTEGER B(1),IND(1),TEMP(1) +C*** +C*** SORT ASSOCIATED ELEMENTS BY PUTTING ITEM(JJ) +C*** INTO ITEM(I), WHERE JJ=IND(I). +C*** + DO 20 I=1,N + JJ=IND(I) + TEMP(I)=B(JJ) + 20 CONTINUE + DO 30 I=1,N + 30 B(I)=TEMP(I) + return + END +c----------------------------------------------------------------------- + subroutine col2(a,b,n) + real a(1),b(1) + +!xbm* unroll (10) + do i=1,n + a(i)=a(i)*b(i) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine col2c(a,b,c,n) + real a(1),b(1),c + + do i=1,n + a(i)=a(i)*b(i)*c + enddo + + return + end +c----------------------------------------------------------------------- + subroutine col3(a,b,c,n) + real a(1),b(1),c(1) + +!xbm* unroll (10) + do i=1,n + a(i)=b(i)*c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add2(a,b,n) + real a(1),b(1) + +!xbm* unroll (10) + do i=1,n + a(i)=a(i)+b(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add3(a,b,c,n) + real a(1),b(1),c(1) + +!xbm* unroll (10) + do i=1,n + a(i)=b(i)+c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine addcol3(a,b,c,n) + real a(1),b(1),c(1) + +!xbm* unroll (10) + do i=1,n + a(i)=a(i)+b(i)*c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add2s1(a,b,c1,n) + real a(1),b(1) + + DO 100 I=1,N + A(I)=C1*A(I)+B(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine add2s2(a,b,c1,n) + real a(1),b(1) + + DO 100 I=1,N + A(I)=A(I)+C1*B(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine add3s2(a,b,c,c1,c2,n) + real a(1),b(1),c(1) + + DO 100 I=1,N + A(I)=C1*B(I)+C2*C(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine add4(a,b,c,d,n) + REAL A(1),B(1),C(1),D(1) + + DO 100 I=1,N + A(I)=B(I)+C(I)+D(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + real function vlsc2(x,y,n) + REAL X(1),Y(1) + + s = 0. + do i=1,n + s = s + x(i)*y(i) + enddo + vlsc2=s + return + end +c----------------------------------------------------------------------- + real function vlsc21(x,y,n) + real x(1),y(1) + + s = 0. + do i=1,n + s = s + x(i)*x(i)*y(i) + enddo + vlsc21=s + return + end + + +C---------------------------------------------------------------------------- +C +C Vector reduction routines which require communication +C on a parallel machine. These routines must be substituted with +C appropriate routines which take into account the specific architecture. +C +C---------------------------------------------------------------------------- + function glsc3(a,b,mult,n) +C +C Perform inner-product in double precision +C + real a(1),b(1),mult(1) + real tmp,work(1) + + tmp = 0.0 + do 10 i=1,n + tmp = tmp + a(i)*b(i)*mult(i) + 10 continue + call gop(tmp,work,'+ ',1) + glsc3 = tmp + return + end +c----------------------------------------------------------------------- + function glsc2(x,y,n) +C +C Perform inner-product in double precision +C + real x(1), y(1) + real tmp,work(1) + + tmp=0.0 + do 10 i=1,n + tmp = tmp+ x(i)*y(i) + 10 continue + CALL GOP(TMP,WORK,'+ ',1) + GLSC2 = TMP + return + END +c----------------------------------------------------------------------- + function glsc23(x,y,z,n) +c +C Perform inner-product x*x*y*z +c + real x(1), y(1),z(1) + real tmp,work(1) + + ds = 0.0 + do 10 i=1,n + ds=ds+x(i)*x(i)*y(i)*z(i) + 10 continue + tmp=ds + call gop(tmp,work,'+ ',1) + glsc23 = tmp + return + end +c----------------------------------------------------------------------- +c real function gl2norm(a,n) + +c include 'SIZE' +c include 'MASS' + +c real a(1) + +c common /scrsf/ w1 (lx1,ly1,lz1,lelt) + +c call col3 (w1,a,a,n) +c call col2 (w1,bm1,n) +c gl2norm = sqrt(glsum (w1,n)/volvm1) + +c return +c end +c----------------------------------------------------------------------- + function glsum (x,n) + DIMENSION X(1) + DIMENSION TMP(1),WORK(1) + TSUM = 0. + DO 100 I=1,N + TSUM = TSUM+X(I) + 100 CONTINUE + TMP(1)=TSUM + CALL GOP(TMP,WORK,'+ ',1) + GLSUM = TMP(1) + return + END +c----------------------------------------------------------------------- + real function glamax(a,n) + REAL A(1) + DIMENSION TMP(1),WORK(1) + TMAX = 0.0 + DO 100 I=1,N + TMAX = MAX(TMAX,ABS(A(I))) + 100 CONTINUE + TMP(1)=TMAX + CALL GOP(TMP,WORK,'M ',1) + GLAMAX=ABS(TMP(1)) + return + END +c----------------------------------------------------------------------- + real function glamin(a,n) + real a(1) + dimension tmp(1),work(1) + tmin = 9.e28 + do 100 i=1,n + tmin = min(tmin,abs(a(i))) + 100 continue + tmp(1)=tmin + call gop(tmp,work,'m ',1) + glamin=abs(tmp(1)) + return + end +c----------------------------------------------------------------------- + function iglmin(a,n) + integer a(1),tmin + integer tmp(1),work(1) + tmin= 999999999 + do i=1,n + tmin=min(tmin,a(i)) + enddo + tmp(1)=tmin + call igop(tmp,work,'m ',1) + iglmin=tmp(1) + return + end +c----------------------------------------------------------------------- + function iglmax(a,n) + integer a(1),tmax + integer tmp(1),work(1) + tmax= -999999999 + do i=1,n + tmax=max(tmax,a(i)) + enddo + tmp(1)=tmax + call igop(tmp,work,'M ',1) + iglmax=tmp(1) + return + end +c----------------------------------------------------------------------- + function iglsum(a,n) + integer a(1),tsum + integer tmp(1),work(1) + tsum= 0 + do i=1,n + tsum=tsum+a(i) + enddo + tmp(1)=tsum + call igop(tmp,work,'+ ',1) + iglsum=tmp(1) + return + end +C----------------------------------------------------------------------- + integer*8 function i8glsum(a,n) + integer*8 a(1),tsum + integer*8 tmp(1),work(1) + tsum= 0 + do i=1,n + tsum=tsum+a(i) + enddo + tmp(1)=tsum + call i8gop(tmp,work,'+ ',1) + i8glsum=tmp(1) + return + end +C----------------------------------------------------------------------- + function glmax(a,n) + REAL A(1) + DIMENSION TMP(1),WORK(1) + TMAX=-99.0e20 + DO 100 I=1,N + TMAX=MAX(TMAX,A(I)) + 100 CONTINUE + TMP(1)=TMAX + CALL GOP(TMP,WORK,'M ',1) + GLMAX=TMP(1) + return + END +c----------------------------------------------------------------------- + function glmin(a,n) + REAL A(1) + DIMENSION TMP(1),WORK(1) + TMIN=99.0e20 + DO 100 I=1,N + TMIN=MIN(TMIN,A(I)) + 100 CONTINUE + TMP(1)=TMIN + CALL GOP(TMP,WORK,'m ',1) + GLMIN = TMP(1) + return + END +c----------------------------------------------------------------------- + subroutine gllog(la,lb) +C +C If ANY LA=LB, then ALL LA=LB. +C + LOGICAL LA,LB + DIMENSION TMP(1),WORK(1) + + TMP(1)=1.0 + IF (LB) THEN + IF (LA) TMP(1)=0.0 + ELSE + IF (.NOT.LA) TMP(1)=0.0 + ENDIF + CALL GOP(TMP,WORK,'* ',1) + IF (TMP(1).EQ.0.0) LA=LB + return + END +c----------------------------------------------------------------------- + function fmdian(a,n,ifok) +C find the Median of the (global) set A + include 'SIZE' + DIMENSION A(1) + DIMENSION WORK1(5),WORK2(5) + DIMENSION GUES(100) + LOGICAL IFOK + + AMP =1.5 + AFAC =1.5 + GMIN =GLMIN(A,N) + GMAX =GLMAX(A,N) + GMIN0=GLMIN(A,N) + GMAX0=GLMAX(A,N) + GUESS=(GMAX+GMIN)/2.0 + EPS =(GMAX-GMIN) + IF (EPS.EQ.0.0) THEN + FMDIAN=GMAX + return + ENDIF + WORK1(1)=N + CALL GOP(WORK1,WORK2,'+ ',1) + NTOT=WORK1(1) + N2 = (NTOT+1)/2 + IF (.NOT.IFOK) THEN + WRITE(6,8) NID,N,(A(I),I=1,N) + WRITE(6,9) NID,NTOT,N2,N,GMIN,GMAX + 8 FORMAT(I5,'N,A:',I5,10(6F10.5,/)) + 9 FORMAT(I5,'mnx:',3I6,2F10.5) + ENDIF +C +C This is the trial loop +C + ITRY=-1 + 10 CONTINUE + ITRY=ITRY+1 + II=ITRY+1 + IF (II.LE.100) GUES(II)=GUESS +C error check for infinite loop + IF (ITRY.GT.2*NTOT) GOTO 9000 + CALL RZERO(WORK1,5) + NLT=0 + NGT=0 + CLT=GMIN0 + CGT=GMAX0 + DO 100 I=1,N + AA=A(I) + IF (AA.NE.GUESS) THEN + IF (AA.LT.GUESS) THEN + NLT=NLT+1 +C CLT - closest value to GUESS Less Than GUESS + IF (AA.GT.CLT) CLT=AA + ENDIF + IF (AA.GT.GUESS) THEN + NGT=NGT+1 +C CGT - closest value to GUESS Greater Than GUESS + IF (AA.LT.CGT) CGT=AA + ENDIF + DUM=1./(EPS+ABS(AA-GUESS)) + WORK1(1)=WORK1(1)+DUM + WORK1(2)=WORK1(2)+DUM*AA + ELSE +C detected values equaling the guess. + WORK1(5)=WORK1(5)+1.0 + ENDIF + 100 CONTINUE +C Invoke vector reduction across processors: + WORK2(1)=CLT + CLT=GLMAX(WORK2,1) + WORK2(1)=CGT + CGT=GLMIN(WORK2,1) + WORK1(3)=NLT + WORK1(4)=NGT + CALL GOP(WORK1,WORK2,'+ ',5) + NLT=WORK1(3) + NGT=WORK1(4) + IF (.NOT.IFOK) THEN + WRITE(6,101) NID,GUESS,CLT,CGT + WRITE(6,102) NID,(WORK1(I),I=1,5) + 101 FORMAT(I5,'Glg:',3F12.5) + 102 FORMAT(I5,'WORK1:',5F12.5) + ENDIF +C +C Done? +C + IF (NLT.GT.N2.OR.NGT.GT.N2) THEN +C we're not done..... + IF (NGT.GT.NLT) THEN +C guess is too low + GMIN=CGT + G2=CGT+MAX(0.,WORK1(2)/WORK1(1)-GUESS)*AMP + IF (G2.GT.GMAX) G2=0.5*(GUESS+GMAX) + EPS=AFAC*ABS(G2-GUESS) +C see that we move at least as far as the next closest value. + GUESS=MAX(G2,CGT) + GOTO 10 + ELSE IF (NLT.GT.NGT) THEN +C guess is too high + GMAX=CLT + G2=CLT+MIN(0.,WORK1(2)/WORK1(1)-GUESS)*AMP + IF (G2.LT.GMIN) G2=0.5*(GUESS+GMIN) + EPS=AFAC*ABS(G2-GUESS) +C see that we move at least as far as the next closest value. + GUESS=MIN(G2,CLT) + GOTO 10 + ENDIF + ELSE +C +C we're done.... + IF (WORK1(5).NE.0) THEN +C the median is (usually) one of the values + FMDIAN=GUESS + IF (WORK1(5).EQ.1.0) THEN + IF (MOD(NTOT,2).EQ.0) THEN + IF (NGT.GT.NLT) THEN + FMDIAN=0.5*(GUESS+CGT) + ELSE + FMDIAN=0.5*(GUESS+CLT) + ENDIF + ELSE + IF (NGT.EQ.NLT) THEN + FMDIAN=GUESS + ELSE IF(NGT.GT.NLT) THEN + FMDIAN=CGT + ELSE + FMDIAN=CLT + ENDIF + ENDIF + ENDIF + ELSE + IF (MOD(NTOT,2).EQ.0) THEN + IF (NGT.EQ.NLT) THEN + FMDIAN=0.5*(CLT+CGT) + ELSE IF(NGT.GT.NLT) THEN + FMDIAN=0.5*(GUESS+CGT) + ELSE + FMDIAN=0.5*(GUESS+CLT) + ENDIF + ELSE + IF (NGT.EQ.NLT) THEN + FMDIAN=GUESS + ELSE IF(NGT.GT.NLT) THEN + FMDIAN=CGT + ELSE + FMDIAN=CLT + ENDIF + ENDIF + ENDIF + + ENDIF + IF (.NOT.IFOK) WRITE(6,*) NID,'FMDIAN2',FMDIAN,(A(I),I=1,N) + return +C +C Error handling +C + 9000 CONTINUE + WRITE(6,11) NTOT,GMIN0,GMAX0,GUESS + 11 FORMAT('ABORTING IN FMDIAN: N,AMIN,AMAX:',I6,3G14.6) + DO 13 I1=1,N,5 + IN=I1+5 + IN=MIN(IN,N) + WRITE(6,12) NID,(A(I),I=I1,IN) + 12 FORMAT(I4,' FMA:',5G14.6) + 13 CONTINUE + DO 15 I1=1,ITRY,5 + IN=I1+5 + IN=MIN(IN,ITRY) + WRITE(6,14) NID,(GUES(I),I=I1,IN) + 14 FORMAT(I4,' FMG:',5G14.6) + 15 CONTINUE + call exitt + END + +C======================================================================== +C Double precision matrix and vector routines +C======================================================================== + +c----------------------------------------------------------------------- + subroutine dcadd(a,const,n) + real*8 A(1),CONST + + DO 100 I=1,N + A(I)=A(I)+CONST + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine dsub2(a,b,n) + real*8 A(1), B(1) + + DO 100 I=1,N + A(I)=A(I)-B(I) + 100 CONTINUE + return + END + +c----------------------------------------------------------------------- + subroutine dadd2(a,b,n) + real*8 A(1), B(1) + + DO 100 I=1,N + A(I)=A(I)+B(I) + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine chswapr(b,L,ind,n,temp) + INTEGER IND(1) + CHARACTER*6 B(1),TEMP(1) + +C*** SORT ASSOCIATED ELEMENTS BY PUTTING ITEM(JJ) +C*** INTO ITEM(I), WHERE JJ=IND(I). +C*** + DO 20 I=1,N + JJ=IND(I) + TEMP(I)=B(JJ) + 20 CONTINUE + DO 30 I=1,N + 30 B(I)=TEMP(I) + return + END +c----------------------------------------------------------------------- + subroutine drcopy(r,d,N) + real*8 d(1) + dimension r(1) + do 10 i=1,n + r(i)=d(i) + 10 continue + return + end + subroutine sorts(xout,xin,work,n) + real xout(1),xin(1),work(1) + call copy(xout,xin,n) + call sort(xout,work,n) + return + end +C +c----------------------------------------------------------------------- + function ivlsum(a,n) + INTEGER A(1) + INTEGER TSUM + if (n.eq.0) then + ivlsum = 0 + return + endif + TSUM=A(1) + DO 100 I=2,N + TSUM=TSUM+A(I) + 100 CONTINUE + IVLSUM=TSUM + return + END +c----------------------------------------------------------------------- + subroutine icadd(a,c,n) + INTEGER A(1),C + DO 100 I = 1, N + 100 A(I) = A(I) + C + return + END + subroutine isort(a,ind,n) +C +C Use Heap Sort (p 231 Num. Rec., 1st Ed.) +C + integer a(1),ind(1) + integer aa + + dO 10 j=1,n + ind(j)=j + 10 continue + + if (n.le.1) return + L=n/2+1 + ir=n + 100 continue + if (l.gt.1) then + l=l-1 + aa = a (l) + ii = ind(l) + else + aa = a(ir) + ii = ind(ir) + a(ir) = a( 1) + ind(ir) = ind( 1) + ir=ir-1 + if (ir.eq.1) then + a(1) = aa + ind(1) = ii + return + endif + endif + i=l + j=l+l + 200 continue + if (j.le.ir) then + if (j.lt.ir) then + if ( a(j).lt.a(j+1) ) j=j+1 + endif + if (aa.lt.a(j)) then + a(i) = a(j) + ind(i) = ind(j) + i=j + j=j+j + else + j=ir+1 + endif + GOTO 200 + endif + a(i) = aa + ind(i) = ii + GOTO 100 + end + subroutine sort(a,ind,n) +C +C Use Heap Sort (p 231 Num. Rec., 1st Ed.) +C + real a(1),aa + integer ind(1) + + dO 10 j=1,n + ind(j)=j + 10 continue + + if (n.le.1) return + L=n/2+1 + ir=n + 100 continue + if (l.gt.1) then + l=l-1 + aa = a (l) + ii = ind(l) + else + aa = a(ir) + ii = ind(ir) + a(ir) = a( 1) + ind(ir) = ind( 1) + ir=ir-1 + if (ir.eq.1) then + a(1) = aa + ind(1) = ii + return + endif + endif + i=l + j=l+l + 200 continue + if (j.le.ir) then + if (j.lt.ir) then + if ( a(j).lt.a(j+1) ) j=j+1 + endif + if (aa.lt.a(j)) then + a(i) = a(j) + ind(i) = ind(j) + i=j + j=j+j + else + j=ir+1 + endif + GOTO 200 + endif + a(i) = aa + ind(i) = ii + GOTO 100 + end +c----------------------------------------------------------------------- + subroutine iswap_ip(x,p,n) + integer x(1),xstart + integer p(1) +c +c In-place permutation: x' = x(p) +c + do k=1,n + if (p(k).gt.0) then ! not swapped + xstart = x(k) + loop_start = k + last = k + do j=k,n + next = p(last) + if (next.lt.0) then + write(6,*) 'Hey! iswap_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(last) = xstart + p(last) = -p(last) + goto 10 + else + x(last) = x(next) + p(last) = -p(last) + last = next + endif + enddo + 10 continue + endif + enddo +c + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine iswapt_ip(x,p,n) + integer x(1),t1,t2 + integer p(1) +c +c In-place permutation: x'(p) = x +c + + do k=1,n + if (p(k).gt.0) then ! not swapped + loop_start = k + next = p(loop_start) + t1 = x(loop_start) + do j=1,n + if (next.lt.0) then + write(6,*) 'Hey! iswapt_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(next) = t1 + p(next) = -p(next) + goto 10 + else + t2 = x(next) + x(next) = t1 + t1 = t2 + nextp = p(next) + p(next) = -p(next) + next = nextp + endif + enddo + 10 continue + endif + enddo + + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine swap_ip(x,p,n) + real x(1),xstart + integer p(1) +c +c In-place permutation: x' = x(p) +c + do k=1,n + if (p(k).gt.0) then ! not swapped + xstart = x(k) + loop_start = k + last = k + do j=k,n + next = p(last) + if (next.lt.0) then + write(6,*) 'Hey! swap_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(last) = xstart + p(last) = -p(last) + goto 10 + else + x(last) = x(next) + p(last) = -p(last) + last = next + endif + enddo + 10 continue + endif + enddo + + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine swapt_ip(x,p,n) + real x(1),t1,t2 + integer p(1) +c +c In-place permutation: x'(p) = x +c + + do k=1,n + if (p(k).gt.0) then ! not swapped + loop_start = k + next = p(loop_start) + t1 = x(loop_start) + do j=1,n + if (next.lt.0) then + write(6,*) 'Hey! swapt_ip problem.',j,k,n,next + call exitt + elseif (next.eq.loop_start) then + x(next) = t1 + p(next) = -p(next) + goto 10 + else + t2 = x(next) + x(next) = t1 + t1 = t2 + nextp = p(next) + p(next) = -p(next) + next = nextp + endif + enddo + 10 continue + endif + enddo + + do k=1,n + p(k) = -p(k) + enddo + return + end +c----------------------------------------------------------------------- + subroutine glvadd(x,w,n) + real x(1),w(1) + call gop(x,w,'+ ',1) + return + end +c----------------------------------------------------------------------- + subroutine add3s12(x,y,z,c1,c2,n) + real x(1),y(1),z(1),c1,c2 + do i=1,n + x(i) = c1*y(i)+c2*z(i) + enddo + return + end +c----------------------------------------------------------------------- + integer*8 function i8glmax(a,n) + integer*8 a(1),tmax + integer*8 tmp(1),work(1) + tmax= -999999 + do i=1,n + tmax=max(tmax,a(i)) + enddo + tmp(1)=tmax + call i8gop(tmp,work,'M ',1) + i8glmax=tmp(1) + if (i8glmax .eq. -999999) i8glmax=0 + return + end +c----------------------------------------------------------------------- + subroutine admcol3(a,b,c,d,n) + REAL A(1),B(1),C(1),D +C + DO 100 I=1,N + A(I)=A(I)+B(I)*C(I)*D + 100 CONTINUE + return + END +c----------------------------------------------------------------------- + subroutine add2col2(a,b,c,n) + real a(1),b(1),c(1) + + do i=1,n + a(i) = a(i) + b(i)*c(i) + enddo + return + end +c----------------------------------------------------------------------- + subroutine add2sxy(x,a,y,b,n) + real x(1),y(1) + + do i=1,n + x(i) = a*x(i) + b*y(i) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine col2s2(x,y,s,n) + real x(n),y(n) + + do i=1,n + x(i)=s*x(i)*y(i) + enddo + + return + end +c----------------------------------------------------------------------- + INTEGER FUNCTION INDX1(S1,S2,L2) + CHARACTER*132 S1,S2 + + N1=132-L2+1 + INDX1=0 + IF (N1.LT.1) return + + DO 100 I=1,N1 + I2=I+L2-1 + IF (S1(I:I2).EQ.S2(1:L2)) THEN + INDX1=I + return + ENDIF + 100 CONTINUE + + return + END +c----------------------------------------------------------------------- + diff --git a/src/mpi_dummy.f b/src/mpi_dummy.f new file mode 100644 index 0000000..f6257df --- /dev/null +++ b/src/mpi_dummy.f @@ -0,0 +1,1053 @@ +c*********************************************************************72 + subroutine mpi_scan(data1, data2, n, datatype, + & operation, comm, ierror ) + + integer data1,data2 ! currently hardwired only for integer + + data2 = data1 + + return + end + +c*********************************************************************72 + subroutine mpi_abort ( comm, errorcode, ierror ) + +c*********************************************************************72 +c +cc MPI_ABORT shuts down the processes in a given communicator. +c + implicit none + + integer comm + integer errorcode + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_ABORT:' + write ( *, '(a,i12)' ) + & ' Shut down with error code = ', errorcode + + stop + end + subroutine mpi_allgather ( data1, nsend, sendtype, data2, + & nrecv, recvtype, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_ALLGATHER gathers data from all the processes in a communicator. +c + implicit none + + include "mpi_dummy.h" + + integer nsend + + integer comm + integer data1(nsend) + integer data2(nsend) + integer ierror + integer nrecv + integer recvtype + integer sendtype + + ierror = MPI_SUCCESS + + if ( sendtype .eq. mpi_double_precision ) then + call mpi_copy_double_precision ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_integer ) then + call mpi_copy_integer ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_real ) then + call mpi_copy_real ( data1, data2, nsend, ierror ) + else + ierror = MPI_FAILURE + end if + + return + end + subroutine mpi_allgatherv ( data1, nsend, sendtype, + & data2, nrecv, ndispls, recvtype, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_ALLGATHERV gathers data from all the processes in a communicator. +c + implicit none + + include "mpi_dummy.h" + + integer nsend + + integer comm + integer data1(nsend) + integer data2(nsend) + integer ierror + integer ndispls + integer nrecv + integer recvtype + integer sendtype + + ierror = MPI_SUCCESS + + if ( sendtype .eq. mpi_double_precision ) then + call mpi_copy_double_precision ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_integer ) then + call mpi_copy_integer ( data1, data2, nsend, ierror ) + else if ( sendtype .eq. mpi_real ) then + call mpi_copy_real ( data1, data2, nsend, ierror ) + else + ierror = MPI_FAILURE + end if + + return + end + subroutine mpi_allreduce ( data1, data2, n, datatype, + & operation, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_ALLREDUCE carries out a reduction operation. +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer comm + integer data1(n) + integer data2(n) + integer datatype + integer ierror + integer operation + + ierror = MPI_SUCCESS + + if ( datatype .eq. mpi_double_precision ) then + + call mpi_reduce_double_precision ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_integer ) then + + call mpi_reduce_integer ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_integer8 ) then + + call mpi_reduce_integer8( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_real ) then + + call mpi_reduce_real ( + & data1, data2, n, operation, ierror ) + + else + + ierror = MPI_FAILURE + + end if + + return + end + + subroutine mpi_barrier ( comm, ierror ) + +c*********************************************************************72 +c +cc MPI_BARRIER forces processes within a communicator to wait together. +c + implicit none + + integer comm + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + return + end + subroutine mpi_bcast ( data, n, datatype, node, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_BCAST broadcasts data from one process to all others. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + integer node + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_bsend ( data, n, datatype, iproc, itag, + & comm, ierror ) + +c*********************************************************************72 +c +cc MPI_BSEND sends data from one process to another, using buffering. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_BSEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_cart_create ( comm, ndims, dims, periods, + & reorder, comm_cart, ierror ) + +c*********************************************************************72 +c +cc MPI_CART_CREATE creates a communicator for a Cartesian topology. +c + implicit none + + integer ndims + + integer comm + integer comm_cart + integer dims(*) + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + logical periods(*) + logical reorder + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_cart_get ( comm, ndims, dims, periods, + & coords, ierror ) + +c*********************************************************************72 +c +cc MPI_CART_GET returns the "Cartesian coordinates" of the calling process. +c + implicit none + + integer ndims + + integer comm + integer coords(*) + integer dims(*) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + logical periods(*) + + ierror = MPI_SUCCESS + + do i = 1, ndims + coords(i) = 0 + end do + + return + end + subroutine mpi_cart_shift ( comm, idir, idisp, isource, + & idest, ierror ) + +c*********************************************************************72 +c +cc MPI_CART_SHIFT finds the destination and source for Cartesian shifts. +c + implicit none + + integer comm + integer idest + integer idir + integer idisp + integer ierror + integer isource + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + isource = 0 + idest = 0 + + return + end + subroutine mpi_comm_dup ( comm, comm_out, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_DUP duplicates a communicator. +c + implicit none + + integer comm + integer comm_out + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + comm_out = comm + + return + end + subroutine mpi_comm_free ( comm, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_FREE "frees" a communicator. +c + implicit none + + integer comm + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_comm_rank ( comm, me, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_RANK reports the rank of the calling process. +c + implicit none + + integer comm + integer ierror + integer me + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + me = 0 + + return + end + subroutine mpi_comm_size ( comm, nprocs, ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_SIZE reports the number of processes in a communicator. +c + implicit none + + integer comm + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + integer nprocs + + ierror = MPI_SUCCESS + nprocs = 1 + + return + end + subroutine mpi_comm_split ( comm, icolor, ikey, comm_new, + & ierror ) + +c*********************************************************************72 +c +cc MPI_COMM_SPLIT splits up a communicator based on a key. +c + implicit none + + integer comm + integer comm_new + integer icolor + integer ierror + integer ikey + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_copy_double_precision ( data1, data2, n, ierror ) + +c*********************************************************************72 +c +cc MPI_COPY_DOUBLE copies a double precision vector. +c + implicit none + + integer n + + double precision data1(n) + double precision data2(n) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_copy_integer ( data1, data2, n, ierror ) + +c*********************************************************************72 +c +cc MPI_COPY_INTEGER copies an integer vector. +c + implicit none + + integer n + + integer data1(n) + integer data2(n) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_copy_real ( data1, data2, n, ierror ) + +c*********************************************************************72 +c + implicit none + + integer n + + real data1(n) + real data2(n) + integer i + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_finalize ( ierror ) + +c*********************************************************************72 +c +cc MPI_FINALIZE shuts down the MPI library. +c + implicit none + + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_get_count ( istatus, datatype, icount, ierror ) + +c*********************************************************************72 +c +cc MPI_GET_COUNT reports the actual number of items transmitted. +c + implicit none + + integer datatype + integer icount + integer ierror + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_GET_COUNT - Error!' + write ( *, '(a)' ) ' Should not query message from self.' + + return + end + subroutine mpi_init ( ierror ) + +c*********************************************************************72 +c +cc MPI_INIT initializes the MPI library. +c + implicit none + + integer ierror + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_SUCCESS + + return + end + subroutine mpi_irecv ( data, n, datatype, iproc, itag, + & comm, irequest, ierror ) + +c*********************************************************************72 +c +cc MPI_IRECV receives data from another process. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer irequest + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_IRECV - Error!' + write ( *, '(a)' ) ' Should not recv message from self.' + + return + end + subroutine mpi_isend ( data, n, datatype, iproc, itag, + & comm, request, ierror ) + +c*********************************************************************72 +c +cc MPI_ISEND sends data from one process to another using nonblocking transmission. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + integer request + + request = 0 + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_ISEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_recv ( data, n, datatype, iproc, itag, + & comm, istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_RECV receives data from another process within a communicator. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer istatus + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_RECV - Error!' + write ( *, '(a)' ) ' Should not recv message from self.' + + return + end + subroutine mpi_reduce ( data1, data2, n, datatype, operation, + & receiver, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE carries out a reduction operation. +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer comm + integer data1(n) + integer data2 + integer datatype + integer ierror + integer operation + integer receiver + + ierror = MPI_SUCCESS + + if ( datatype .eq. mpi_double_precision ) then + + call mpi_reduce_double_precision ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_integer ) then + + call mpi_reduce_integer ( + & data1, data2, n, operation, ierror ) + + else if ( datatype .eq. mpi_real ) then + + call mpi_reduce_real ( + & data1, data2, n, operation, ierror ) + + else + + ierror = MPI_FAILURE + + end if + + return + end + subroutine mpi_reduce_double_precision ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE_DOUBLE_PRECISION carries out a reduction operation on double precision values. +c + implicit none + + include "mpi_dummy.h" + + integer n + + double precision data1(n) + double precision data2(n) + integer i + integer ierror + integer operation + + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + + subroutine mpi_reduce_integer8 ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer*8 data1(n) + integer*8 data2(n) + integer i + integer ierror + integer operation + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + ierror = MPI_FAILURE + + return + end + + subroutine mpi_reduce_integer ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer data1(n) + integer data2(n) + integer i + integer ierror + integer operation + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + ierror = MPI_FAILURE + + return + end + + subroutine mpi_reduce_real ( + & data1, data2, n, operation, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE_REAL carries out a reduction operation on reals. +c +c Discussion: +c + implicit none + + include "mpi_dummy.h" + + integer n + + real data1(n) + real data2(n) + integer i + integer ierror + integer operation + + ierror = MPI_SUCCESS + + do i = 1, n + data2(i) = data1(i) + end do + + return + end + subroutine mpi_reduce_scatter ( data1, data2, n, datatype, + & operation, comm, ierror ) + +c*********************************************************************72 +c +cc MPI_REDUCE_SCATTER collects a message of the same length from each process. +c + implicit none + + include "mpi_dummy.h" + + integer n + + integer comm + integer data1(n) + integer data2(n) + integer datatype + integer ierror + integer operation + + ierror = MPI_SUCCESS + + if ( datatype .eq. mpi_double_precision ) then + call mpi_copy_double_precision ( data1, data2, n, ierror ) + else if ( datatype .eq. mpi_integer ) then + call mpi_copy_integer ( data1, data2, n, ierror ) + else if ( datatype .eq. mpi_real ) then + call mpi_copy_real ( data1, data2, n, ierror ) + else + ierror = MPI_FAILURE + end if + + return + end + subroutine mpi_rsend ( data, n, datatype, iproc, itag, + & comm, ierror ) + +c*********************************************************************72 +c +cc MPI_RSEND "ready sends" data from one process to another. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_RSEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_send ( data, n, datatype, iproc, itag, + & comm, ierror ) + +c*********************************************************************72 +c +cc MPI_SEND sends data from one process to another. +c + implicit none + + integer n + + integer comm + integer data(n) + integer datatype + integer ierror + integer iproc + integer itag + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_SEND - Error!' + write ( *, '(a)' ) ' Should not send message to self.' + + return + end + subroutine mpi_wait ( irequest, istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_WAIT waits for an I/O request to complete. +c + implicit none + + integer ierror + integer irequest + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_WAIT - Error!' + write ( *, '(a)' ) ' Should not wait on message from self.' + + return + end + subroutine mpi_waitall ( icount, irequest, istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_WAITALL waits until all I/O requests have completed. +c + implicit none + + integer icount + integer ierror + integer irequest + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_WAITALL - Error!' + write ( *, '(a)' ) ' Should not wait on message from self.' + + return + end + subroutine mpi_waitany ( icount, array_of_requests, index, + & istatus, ierror ) + +c*********************************************************************72 +c +cc MPI_WAITANY waits until one I/O requests has completed. +c + implicit none + + integer array_of_requests(*) + integer icount + integer ierror + integer index + integer istatus + integer MPI_FAILURE + parameter ( MPI_FAILURE = 1 ) + integer MPI_SUCCESS + parameter ( MPI_SUCCESS = 0 ) + + ierror = MPI_FAILURE + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MPI_WAITANY - Error!' + write ( *, '(a)' ) ' Should not wait on message from self.' + + return + end + function mpi_wtick ( ) + +c*********************************************************************72 +c +cc MPI_WTICK returns the time between clock ticks. +c + implicit none + + double precision mpi_wtick + + mpi_wtick = 1.0D+00 + + return + end + function mpi_wtime ( ) + +c*********************************************************************72 +c +cc MPI_WTIME returns the elapsed wall clock time. +c + implicit none + + real*8 mpi_wtime + real*8 a + integer*8 countval, countrate, countmax + + call system_clock(countval, countrate, countmax) + a = countval + mpi_wtime = a/countrate + + return + end + + subroutine mpi_initialized(mpi_is_initialized, ierr) + + mpi_is_initialized = 0 + ierr = 0 + + return + end + + subroutine mpi_comm_create(icomm,igroup,icommd,ierr) + + icommd = 1 + + return + end + + subroutine mpi_comm_group(icomm,igroup,ierr) + + igroup = 1 + ierr = 0 + + return + end + + subroutine mpi_group_free + + return + end + + subroutine mpi_attr_get(icomm,ikey,ival,iflag,ierr) + + logical iflag + + ival = 999 999 999 ! dummy + + return + end +c----------------------------------------------------------------------- diff --git a/src/mpi_dummy.h b/src/mpi_dummy.h new file mode 100644 index 0000000..0a92b81 --- /dev/null +++ b/src/mpi_dummy.h @@ -0,0 +1,61 @@ +c +c Dummy parameters for MPI F77 stubs +c + integer mpi_comm_world + parameter ( mpi_comm_world = 0 ) +c +c Return values. +c + integer mpi_failure + parameter ( mpi_failure = 1 ) + integer mpi_success + parameter ( mpi_success = 0 ) +c +c recv message status +c + integer mpi_status_size + parameter ( mpi_status_size = 3 ) + integer mpi_source + parameter ( mpi_source = 1 ) + integer mpi_tag + parameter ( mpi_tag = 2 ) + integer mpi_count + parameter ( mpi_count = 3 ) +c +c recv flags +c + integer mpi_any_source + parameter ( mpi_any_source = -1 ) + integer mpi_any_tag + parameter ( mpi_any_tag = -1 ) +c +c data types and sizes +c + integer mpi_integer + parameter ( mpi_integer = 1 ) + integer mpi_integer8 + parameter ( mpi_integer8 = 6 ) + integer mpi_real + parameter ( mpi_real = 2 ) + integer mpi_double_precision + parameter ( mpi_double_precision = 3 ) + integer mpi_logical + parameter ( mpi_logical = 4 ) + integer mpi_character + parameter ( mpi_character = 5 ) +c +c allreduce operations +c + integer mpi_sum + parameter ( mpi_sum = 1 ) + integer mpi_max + parameter ( mpi_max = 2 ) + integer mpi_min + parameter ( mpi_min = 3 ) + integer mpi_product + parameter ( mpi_product = 4 ) +c +c timer +c + external mpi_wtime + real*8 mpi_wtime diff --git a/src/mxm_std.f b/src/mxm_std.f new file mode 100644 index 0000000..5e21cb3 --- /dev/null +++ b/src/mxm_std.f @@ -0,0 +1,4123 @@ + subroutine mxmf2(A,N1,B,N2,C,N3) +c +c unrolled loop version +c + real a(n1,n2),b(n2,n3),c(n1,n3) + + if (n2.le.8) then + if (n2.eq.1) then + call mxf1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxf2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxf3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxf4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxf5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxf6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxf7(a,n1,b,n2,c,n3) + else + call mxf8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxf9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxf10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxf11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxf12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxf13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxf14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxf15(a,n1,b,n2,c,n3) + else + call mxf16(a,n1,b,n2,c,n3) + endif + elseif (n2.le.24) then + if (n2.eq.17) then + call mxf17(a,n1,b,n2,c,n3) + elseif (n2.eq.18) then + call mxf18(a,n1,b,n2,c,n3) + elseif (n2.eq.19) then + call mxf19(a,n1,b,n2,c,n3) + elseif (n2.eq.20) then + call mxf20(a,n1,b,n2,c,n3) + elseif (n2.eq.21) then + call mxf21(a,n1,b,n2,c,n3) + elseif (n2.eq.22) then + call mxf22(a,n1,b,n2,c,n3) + elseif (n2.eq.23) then + call mxf23(a,n1,b,n2,c,n3) + elseif (n2.eq.24) then + call mxf24(a,n1,b,n2,c,n3) + endif + else + call mxm44_0(a,n1,b,n2,c,n3) + endif +c + return + end +c----------------------------------------------------------------------- + subroutine mxf1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf17(a,n1,b,n2,c,n3) +c + real a(n1,17),b(17,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf18(a,n1,b,n2,c,n3) +c + real a(n1,18),b(18,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf19(a,n1,b,n2,c,n3) +c + real a(n1,19),b(19,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf20(a,n1,b,n2,c,n3) +c + real a(n1,20),b(20,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf21(a,n1,b,n2,c,n3) +c + real a(n1,21),b(21,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf22(a,n1,b,n2,c,n3) +c + real a(n1,22),b(22,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf23(a,n1,b,n2,c,n3) +c + real a(n1,23),b(23,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxf24(a,n1,b,n2,c,n3) +c + real a(n1,24),b(24,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + $ + a(i,24)*b(24,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxm44_0(a, m, b, k, c, n) +c +c matrix multiply with a 4x4 pencil +c + real a(m,k), b(k,n), c(m,n) + real s11, s12, s13, s14, s21, s22, s23, s24 + real s31, s32, s33, s34, s41, s42, s43, s44 + + mresid = iand(m,3) + nresid = iand(n,3) + m1 = m - mresid + 1 + n1 = n - nresid + 1 + + do i=1,m-mresid,4 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + s44 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + s14 = s14 + a(i,l)*b(l,j+3) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + s24 = s24 + a(i+1,l)*b(l,j+3) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + s34 = s34 + a(i+2,l)*b(l,j+3) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + s44 = s44 + a(i+3,l)*b(l,j+3) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + c(i,j+2) = s13 + c(i,j+3) = s14 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + + c(i+1,j+3) = s24 + c(i+2,j+3) = s34 + c(i+3,j+3) = s44 + enddo +* Residual when n is not multiple of 4 + if (nresid .ne. 0) then + if (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,n) + s21 = s21 + a(i+1,l)*b(l,n) + s31 = s31 + a(i+2,l)*b(l,n) + s41 = s41 + a(i+3,l)*b(l,n) + enddo + c(i,n) = s11 + c(i+1,n) = s21 + c(i+2,n) = s31 + c(i+3,n) = s41 + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + enddo + c(i,j) = s11 + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + c(i,j+1) = s12 + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + c(i,j+2) = s13 + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + endif + endif + enddo + +* Residual when m is not multiple of 4 + if (mresid .eq. 0) then + return + elseif (mresid .eq. 1) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,j) + s12 = s12 + a(m,l)*b(l,j+1) + s13 = s13 + a(m,l)*b(l,j+2) + s14 = s14 + a(m,l)*b(l,j+3) + enddo + c(m,j) = s11 + c(m,j+1) = s12 + c(m,j+2) = s13 + c(m,j+3) = s14 + enddo +* mresid is 1, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n) + enddo + c(m,n) = s11 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s12 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-1) + s12 = s12 + a(m,l)*b(l,n) + enddo + c(m,n-1) = s11 + c(m,n) = s12 + return + else + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-2) + s12 = s12 + a(m,l)*b(l,n-1) + s13 = s13 + a(m,l)*b(l,n) + enddo + c(m,n-2) = s11 + c(m,n-1) = s12 + c(m,n) = s13 + return + endif + elseif (mresid .eq. 2) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + s21 = 0.0d0 + s22 = 0.0d0 + s23 = 0.0d0 + s24 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,j) + s12 = s12 + a(m-1,l)*b(l,j+1) + s13 = s13 + a(m-1,l)*b(l,j+2) + s14 = s14 + a(m-1,l)*b(l,j+3) + + s21 = s21 + a(m,l)*b(l,j) + s22 = s22 + a(m,l)*b(l,j+1) + s23 = s23 + a(m,l)*b(l,j+2) + s24 = s24 + a(m,l)*b(l,j+3) + enddo + c(m-1,j) = s11 + c(m-1,j+1) = s12 + c(m-1,j+2) = s13 + c(m-1,j+3) = s14 + c(m,j) = s21 + c(m,j+1) = s22 + c(m,j+2) = s23 + c(m,j+3) = s24 + enddo +* mresid is 2, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n) + enddo + c(m-1,n) = s11 + c(m,n) = s21 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-1) + s12 = s12 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-1) + s22 = s22 + a(m,l)*b(l,n) + enddo + c(m-1,n-1) = s11 + c(m-1,n) = s12 + c(m,n-1) = s21 + c(m,n) = s22 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-2) + s12 = s12 + a(m-1,l)*b(l,n-1) + s13 = s13 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-2) + s22 = s22 + a(m,l)*b(l,n-1) + s23 = s23 + a(m,l)*b(l,n) + enddo + c(m-1,n-2) = s11 + c(m-1,n-1) = s12 + c(m-1,n) = s13 + c(m,n-2) = s21 + c(m,n-1) = s22 + c(m,n) = s23 + return + endif + else +* mresid is 3 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + + do l=1,k + s11 = s11 + a(m-2,l)*b(l,j) + s12 = s12 + a(m-2,l)*b(l,j+1) + s13 = s13 + a(m-2,l)*b(l,j+2) + s14 = s14 + a(m-2,l)*b(l,j+3) + + s21 = s21 + a(m-1,l)*b(l,j) + s22 = s22 + a(m-1,l)*b(l,j+1) + s23 = s23 + a(m-1,l)*b(l,j+2) + s24 = s24 + a(m-1,l)*b(l,j+3) + + s31 = s31 + a(m,l)*b(l,j) + s32 = s32 + a(m,l)*b(l,j+1) + s33 = s33 + a(m,l)*b(l,j+2) + s34 = s34 + a(m,l)*b(l,j+3) + enddo + c(m-2,j) = s11 + c(m-2,j+1) = s12 + c(m-2,j+2) = s13 + c(m-2,j+3) = s14 + + c(m-1,j) = s21 + c(m-1,j+1) = s22 + c(m-1,j+2) = s23 + c(m-1,j+3) = s24 + + c(m,j) = s31 + c(m,j+1) = s32 + c(m,j+2) = s33 + c(m,j+3) = s34 + enddo +* mresid is 3, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n) + enddo + c(m-2,n) = s11 + c(m-1,n) = s21 + c(m,n) = s31 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-1) + s12 = s12 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-1) + s22 = s22 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-1) + s32 = s32 + a(m,l)*b(l,n) + enddo + c(m-2,n-1) = s11 + c(m-2,n) = s12 + c(m-1,n-1) = s21 + c(m-1,n) = s22 + c(m,n-1) = s31 + c(m,n) = s32 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-2) + s12 = s12 + a(m-2,l)*b(l,n-1) + s13 = s13 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-2) + s22 = s22 + a(m-1,l)*b(l,n-1) + s23 = s23 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-2) + s32 = s32 + a(m,l)*b(l,n-1) + s33 = s33 + a(m,l)*b(l,n) + enddo + c(m-2,n-2) = s11 + c(m-2,n-1) = s12 + c(m-2,n) = s13 + c(m-1,n-2) = s21 + c(m-1,n-1) = s22 + c(m-1,n) = s23 + c(m,n-2) = s31 + c(m,n-1) = s32 + c(m,n) = s33 + return + endif + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxm44_2(a, m, b, k, c, n) + real a(m,2), b(2,n), c(m,n) + + nresid = iand(n,3) + n1 = n - nresid + 1 + + do j=1,n-nresid,4 + do i=1,m + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + c(i,j+1) = a(i,1)*b(1,j+1) + $ + a(i,2)*b(2,j+1) + c(i,j+2) = a(i,1)*b(1,j+2) + $ + a(i,2)*b(2,j+2) + c(i,j+3) = a(i,1)*b(1,j+3) + $ + a(i,2)*b(2,j+3) + enddo + enddo + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + do i=1,m + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + elseif (nresid .eq. 2) then + do i=1,m + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + else + do i=1,m + c(i,n-2) = a(i,1)*b(1,n-2) + $ + a(i,2)*b(2,n-2) + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxm_test_all(nid,ivb) +c +c Collect matrix-matrix product statistics +c + external mxms,mxmur2,mxmur3,mxmd,mxmfb,mxmf3,mxmu4,mxmn2 + external mxmk2,mxmtr,mxmrg,madd,mxm,mxm44 +c + parameter (nn=24) + parameter (nt=10) + character*5 c(3,nt) + real s(nn,2,nt,3) + real a(nn,2,nt,3) + + call nekgsync + + do k=1,3 ! 3 tests: N^2 x N, NxN, NxN^2 + call mxmtest(s(1,1, 1,k),nn,c(k, 1),mxm44 ,'mxm44',k,ivb) + call mxmtest(s(1,1, 2,k),nn,c(k, 2),mxms ,' std ',k,ivb) + call mxmtest(s(1,1, 3,k),nn,c(k, 3),mxmur2,'mxmu2',k,ivb) + call mxmtest(s(1,1, 4,k),nn,c(k, 4),mxmur3,'mxmu3',k,ivb) + call mxmtest(s(1,1, 5,k),nn,c(k, 5),mxmd ,'mxmd ',k,ivb) + call mxmtest(s(1,1, 6,k),nn,c(k, 6),mxmfb ,'mxmfb',k,ivb) + call mxmtest(s(1,1, 7,k),nn,c(k, 7),mxmu4 ,'mxmu4',k,ivb) + call mxmtest(s(1,1, 8,k),nn,c(k, 8),mxmf3 ,'mxmf3',k,ivb) + if (k.eq.2) ! Add works only for NxN case + $ call mxmtest(s(1,1, 9,k),nn,c(k, 9),madd ,'madd ',k,ivb) + call mxmtest(s(1,1,10,k),nn,c(k,10),mxm ,'mxm ',k,ivb) + enddo + + call nekgsync + if (nid.eq.0) call mxm_analyze(s,a,nn,c,nt,ivb) + call nekgsync + + return + end +c----------------------------------------------------------------------- + subroutine initab(a,b,n) + real a(1),b(1) + do i=1,n-1 + x = i + k = mod(i,19) + 2 + l = mod(i,17) + 5 + m = mod(i,31) + 3 + a(i) = -.25*(a(i)+a(i+1)) + (x*x + k + l)/(x*x+m) + b(i) = -.25*(b(i)+b(i+1)) + (x*x + k + m)/(x*x+l) + enddo + a(n) = -.25*(a(n)+a(n)) + (x*x + k + l)/(x*x+m) + b(n) = -.25*(b(n)+b(n)) + (x*x + k + m)/(x*x+l) + return + end +c----------------------------------------------------------------------- + subroutine mxms(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO 100 I=1,N1 + C(I,J)=C(I,J)+A(I,K)*BB + 100 CONTINUE + return + end +c----------------------------------------------------------------------- + subroutine mxmu4(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + i1 = n1 - mod(n1,4) + 1 + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO I=1,N1-3,4 + C(I ,J)=C(I ,J)+A(I ,K)*BB + C(I+1,J)=C(I+1,J)+A(I+1,K)*BB + C(I+2,J)=C(I+2,J)+A(I+2,K)*BB + C(I+3,J)=C(I+3,J)+A(I+3,K)*BB + enddo + DO i=i1,N1 + C(I ,J)=C(I ,J)+A(I ,K)*BB + enddo + 100 CONTINUE + return + end +c----------------------------------------------------------------------- + subroutine madd (a,n1,b,n2,c,n3) +c + real a(n1,n2),b(n2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,j)+b(i,j) + enddo + enddo +c + return + end +c----------------------------------------------------------------------- + subroutine mxmUR2(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + if (n2.le.8) then + if (n2.eq.1) then + call mxmur2_1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxmur2_2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxmur2_3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxmur2_4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxmur2_5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxmur2_6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxmur2_7(a,n1,b,n2,c,n3) + else + call mxmur2_8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxmur2_9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxmur2_10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxmur2_11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxmur2_12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxmur2_13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxmur2_14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxmur2_15(a,n1,b,n2,c,n3) + else + call mxmur2_16(a,n1,b,n2,c,n3) + endif + else + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO 100 I=1,N1 + C(I,J)=C(I,J)+A(I,K)*BB + 100 CONTINUE + endif + return + end +c + subroutine mxmur2_1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end + subroutine mxmur2_2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end + subroutine mxmur2_3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end + subroutine mxmur2_4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end + subroutine mxmur2_5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end + subroutine mxmur2_6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end + subroutine mxmur2_7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end + subroutine mxmur2_8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end + subroutine mxmur2_9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end + subroutine mxmur2_10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end + subroutine mxmur2_11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end + subroutine mxmur2_12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end + subroutine mxmur2_13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end + subroutine mxmur2_14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end + subroutine mxmur2_15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end + subroutine mxmur2_16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmUR3(a,n1,b,n2,c,n3) +C---------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + N0=N1*N3 + DO 10 I=1,N0 + C(I,1)=0. + 10 CONTINUE + if (n3.le.8) then + if (n3.eq.1) then + call mxmur3_1(a,n1,b,n2,c,n3) + elseif (n3.eq.2) then + call mxmur3_2(a,n1,b,n2,c,n3) + elseif (n3.eq.3) then + call mxmur3_3(a,n1,b,n2,c,n3) + elseif (n3.eq.4) then + call mxmur3_4(a,n1,b,n2,c,n3) + elseif (n3.eq.5) then + call mxmur3_5(a,n1,b,n2,c,n3) + elseif (n3.eq.6) then + call mxmur3_6(a,n1,b,n2,c,n3) + elseif (n3.eq.7) then + call mxmur3_7(a,n1,b,n2,c,n3) + else + call mxmur3_8(a,n1,b,n2,c,n3) + endif + elseif (n3.le.16) then + if (n3.eq.9) then + call mxmur3_9(a,n1,b,n2,c,n3) + elseif (n3.eq.10) then + call mxmur3_10(a,n1,b,n2,c,n3) + elseif (n3.eq.11) then + call mxmur3_11(a,n1,b,n2,c,n3) + elseif (n3.eq.12) then + call mxmur3_12(a,n1,b,n2,c,n3) + elseif (n3.eq.13) then + call mxmur3_13(a,n1,b,n2,c,n3) + elseif (n3.eq.14) then + call mxmur3_14(a,n1,b,n2,c,n3) + elseif (n3.eq.15) then + call mxmur3_15(a,n1,b,n2,c,n3) + else + call mxmur3_16(a,n1,b,n2,c,n3) + endif + else + DO 100 J=1,N3 + DO 100 K=1,N2 + BB=B(K,J) + DO 100 I=1,N1 + C(I,J)=C(I,J)+A(I,K)*BB + 100 CONTINUE + endif + return + end +c + subroutine mxmur3_16(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,16),c(n1,16) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + tmp14 = b(k,14) + tmp15 = b(k,15) + tmp16 = b(k,16) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + c(i,14) = c(i,14) + a(i,k) * tmp14 + c(i,15) = c(i,15) + a(i,k) * tmp15 + c(i,16) = c(i,16) + a(i,k) * tmp16 + enddo +c + enddo +c + return + end + subroutine mxmur3_15(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,15),c(n1,15) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + tmp14 = b(k,14) + tmp15 = b(k,15) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + c(i,14) = c(i,14) + a(i,k) * tmp14 + c(i,15) = c(i,15) + a(i,k) * tmp15 + enddo +c + enddo +c + return + end + subroutine mxmur3_14(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,14),c(n1,14) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + tmp14 = b(k,14) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + c(i,14) = c(i,14) + a(i,k) * tmp14 + enddo +c + enddo +c + return + end + subroutine mxmur3_13(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,13),c(n1,13) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + tmp13 = b(k,13) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + c(i,13) = c(i,13) + a(i,k) * tmp13 + enddo +c + enddo +c + return + end + subroutine mxmur3_12(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,12),c(n1,12) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + tmp12 = b(k,12) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + c(i,12) = c(i,12) + a(i,k) * tmp12 + enddo +c + enddo +c + return + end + subroutine mxmur3_11(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,11),c(n1,11) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + tmp11 = b(k,11) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + c(i,11) = c(i,11) + a(i,k) * tmp11 + enddo +c + enddo +c + return + end + subroutine mxmur3_10(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,10),c(n1,10) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + tmp10 = b(k,10) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + c(i,10) = c(i,10) + a(i,k) * tmp10 + enddo +c + enddo +c + return + end + subroutine mxmur3_9(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,9),c(n1,9) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + tmp9 = b(k, 9) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + c(i, 9) = c(i, 9) + a(i,k) * tmp9 + enddo +c + enddo +c + return + end + subroutine mxmur3_8(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,8),c(n1,8) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + tmp8 = b(k, 8) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + c(i, 8) = c(i, 8) + a(i,k) * tmp8 + enddo +c + enddo +c + return + end + subroutine mxmur3_7(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,7),c(n1,7) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + tmp7 = b(k, 7) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + c(i, 7) = c(i, 7) + a(i,k) * tmp7 + enddo +c + enddo +c + return + end + subroutine mxmur3_6(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,6),c(n1,6) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + tmp6 = b(k, 6) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + c(i, 6) = c(i, 6) + a(i,k) * tmp6 + enddo +c + enddo +c + return + end + subroutine mxmur3_5(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,5),c(n1,5) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + tmp5 = b(k, 5) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + c(i, 5) = c(i, 5) + a(i,k) * tmp5 + enddo +c + enddo +c + return + end + subroutine mxmur3_4(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,4),c(n1,4) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + tmp4 = b(k, 4) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + c(i, 4) = c(i, 4) + a(i,k) * tmp4 + enddo +c + enddo +c + return + end + subroutine mxmur3_3(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,3),c(n1,3) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + tmp3 = b(k, 3) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + c(i, 3) = c(i, 3) + a(i,k) * tmp3 + enddo +c + enddo +c + return + end + subroutine mxmur3_2(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,2),c(n1,2) +c + do k=1,n2 + tmp1 = b(k, 1) + tmp2 = b(k, 2) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + c(i, 2) = c(i, 2) + a(i,k) * tmp2 + enddo +c + enddo +c + return + end + subroutine mxmur3_1(a,n1,b,n2,c,n3) + real a(n1,n2),b(n2,1),c(n1,1) +c + do k=1,n2 + tmp1 = b(k, 1) + do i=1,n1 + c(i, 1) = c(i, 1) + a(i,k) * tmp1 + enddo + enddo +c + return + end +C---------------------------------------------------------------------- + subroutine mxmd(a,n1,b,n2,c,n3) +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C--------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) + REAL ONE,ZERO,EPS +C +C +C + one=1.0 + zero=0.0 + call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + return + end +c----------------------------------------------------------------------- + subroutine mxmfb(a,n1,b,n2,c,n3) +C----------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C---------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + integer wdsize + save wdsize + data wdsize/0/ +c +c First call: determine word size for dgemm/sgemm discrimination, below. +c + if (wdsize.eq.0) then + one = 1.0 + eps = 1.e-12 + wdsize = 8 + if (one+eps.eq.1.0) wdsize = 4 + endif +c + if (n2.le.8) then + if (n2.eq.1) then + call mxmfb_1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxmfb_2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxmfb_3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxmfb_4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxmfb_5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxmfb_6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxmfb_7(a,n1,b,n2,c,n3) + else + call mxmfb_8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxmfb_9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxmfb_10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxmfb_11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxmfb_12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxmfb_13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxmfb_14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxmfb_15(a,n1,b,n2,c,n3) + else + call mxmfb_16(a,n1,b,n2,c,n3) + endif + elseif (n2.le.24) then + if (n2.eq.17) then + call mxmfb_17(a,n1,b,n2,c,n3) + elseif (n2.eq.18) then + call mxmfb_18(a,n1,b,n2,c,n3) + elseif (n2.eq.19) then + call mxmfb_19(a,n1,b,n2,c,n3) + elseif (n2.eq.20) then + call mxmfb_20(a,n1,b,n2,c,n3) + elseif (n2.eq.21) then + call mxmfb_21(a,n1,b,n2,c,n3) + elseif (n2.eq.22) then + call mxmfb_22(a,n1,b,n2,c,n3) + elseif (n2.eq.23) then + call mxmfb_23(a,n1,b,n2,c,n3) + elseif (n2.eq.24) then + call mxmfb_24(a,n1,b,n2,c,n3) + endif + else +c + one=1.0 + zero=0.0 + if (wdsize.eq.4) then + call sgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + else + call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + endif + + endif + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_17(a,n1,b,n2,c,n3) +c + real a(n1,17),b(17,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_18(a,n1,b,n2,c,n3) +c + real a(n1,18),b(18,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_19(a,n1,b,n2,c,n3) +c + real a(n1,19),b(19,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_20(a,n1,b,n2,c,n3) +c + real a(n1,20),b(20,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_21(a,n1,b,n2,c,n3) +c + real a(n1,21),b(21,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_22(a,n1,b,n2,c,n3) +c + real a(n1,22),b(22,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_23(a,n1,b,n2,c,n3) +c + real a(n1,23),b(23,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmfb_24(a,n1,b,n2,c,n3) +c + real a(n1,24),b(24,n3),c(n1,n3) +c + do j=1,n3 + do i=1,n1 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + $ + a(i,24)*b(24,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3(a,n1,b,n2,c,n3) +C----------------------------------------------------------------------- +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C---------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +C + integer wdsize + save wdsize + data wdsize/0/ +c +c First call: determine word size for dgemm/sgemm discrimination, below. +c + if (wdsize.eq.0) then + one = 1.0 + eps = 1.e-12 + wdsize = 8 + if (one+eps.eq.1.0) wdsize = 4 + endif +c + if (n2.le.8) then + if (n2.eq.1) then + call mxmf3_1(a,n1,b,n2,c,n3) + elseif (n2.eq.2) then + call mxmf3_2(a,n1,b,n2,c,n3) + elseif (n2.eq.3) then + call mxmf3_3(a,n1,b,n2,c,n3) + elseif (n2.eq.4) then + call mxmf3_4(a,n1,b,n2,c,n3) + elseif (n2.eq.5) then + call mxmf3_5(a,n1,b,n2,c,n3) + elseif (n2.eq.6) then + call mxmf3_6(a,n1,b,n2,c,n3) + elseif (n2.eq.7) then + call mxmf3_7(a,n1,b,n2,c,n3) + else + call mxmf3_8(a,n1,b,n2,c,n3) + endif + elseif (n2.le.16) then + if (n2.eq.9) then + call mxmf3_9(a,n1,b,n2,c,n3) + elseif (n2.eq.10) then + call mxmf3_10(a,n1,b,n2,c,n3) + elseif (n2.eq.11) then + call mxmf3_11(a,n1,b,n2,c,n3) + elseif (n2.eq.12) then + call mxmf3_12(a,n1,b,n2,c,n3) + elseif (n2.eq.13) then + call mxmf3_13(a,n1,b,n2,c,n3) + elseif (n2.eq.14) then + call mxmf3_14(a,n1,b,n2,c,n3) + elseif (n2.eq.15) then + call mxmf3_15(a,n1,b,n2,c,n3) + else + call mxmf3_16(a,n1,b,n2,c,n3) + endif + elseif (n2.le.24) then + if (n2.eq.17) then + call mxmf3_17(a,n1,b,n2,c,n3) + elseif (n2.eq.18) then + call mxmf3_18(a,n1,b,n2,c,n3) + elseif (n2.eq.19) then + call mxmf3_19(a,n1,b,n2,c,n3) + elseif (n2.eq.20) then + call mxmf3_20(a,n1,b,n2,c,n3) + elseif (n2.eq.21) then + call mxmf3_21(a,n1,b,n2,c,n3) + elseif (n2.eq.22) then + call mxmf3_22(a,n1,b,n2,c,n3) + elseif (n2.eq.23) then + call mxmf3_23(a,n1,b,n2,c,n3) + elseif (n2.eq.24) then + call mxmf3_24(a,n1,b,n2,c,n3) + endif + else +c + one=1.0 + zero=0.0 + if (wdsize.eq.4) then + call sgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + else + call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1) + endif +c +c N0=N1*N3 +c DO 10 I=1,N0 +c C(I,1)=0. +c 10 CONTINUE +c DO 100 J=1,N3 +c DO 100 K=1,N2 +c BB=B(K,J) +c DO 100 I=1,N1 +c C(I,J)=C(I,J)+A(I,K)*BB +c 100 CONTINUE + + endif + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_1(a,n1,b,n2,c,n3) +c + real a(n1,1),b(1,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_2(a,n1,b,n2,c,n3) +c + real a(n1,2),b(2,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_3(a,n1,b,n2,c,n3) +c + real a(n1,3),b(3,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_4(a,n1,b,n2,c,n3) +c + real a(n1,4),b(4,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_5(a,n1,b,n2,c,n3) +c + real a(n1,5),b(5,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_6(a,n1,b,n2,c,n3) +c + real a(n1,6),b(6,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_7(a,n1,b,n2,c,n3) +c + real a(n1,7),b(7,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_8(a,n1,b,n2,c,n3) +c + real a(n1,8),b(8,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_9(a,n1,b,n2,c,n3) +c + real a(n1,9),b(9,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_10(a,n1,b,n2,c,n3) +c + real a(n1,10),b(10,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_11(a,n1,b,n2,c,n3) +c + real a(n1,11),b(11,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_12(a,n1,b,n2,c,n3) +c + real a(n1,12),b(12,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_13(a,n1,b,n2,c,n3) +c + real a(n1,13),b(13,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_14(a,n1,b,n2,c,n3) +c + real a(n1,14),b(14,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_15(a,n1,b,n2,c,n3) +c + real a(n1,15),b(15,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_16(a,n1,b,n2,c,n3) +c + real a(n1,16),b(16,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_17(a,n1,b,n2,c,n3) +c + real a(n1,17),b(17,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_18(a,n1,b,n2,c,n3) +c + real a(n1,18),b(18,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_19(a,n1,b,n2,c,n3) +c + real a(n1,19),b(19,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_20(a,n1,b,n2,c,n3) +c + real a(n1,20),b(20,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_21(a,n1,b,n2,c,n3) +c + real a(n1,21),b(21,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_22(a,n1,b,n2,c,n3) +c + real a(n1,22),b(22,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_23(a,n1,b,n2,c,n3) +c + real a(n1,23),b(23,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxmf3_24(a,n1,b,n2,c,n3) +c + real a(n1,24),b(24,n3),c(n1,n3) +c + do i=1,n1 + do j=1,n3 + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + $ + a(i,3)*b(3,j) + $ + a(i,4)*b(4,j) + $ + a(i,5)*b(5,j) + $ + a(i,6)*b(6,j) + $ + a(i,7)*b(7,j) + $ + a(i,8)*b(8,j) + $ + a(i,9)*b(9,j) + $ + a(i,10)*b(10,j) + $ + a(i,11)*b(11,j) + $ + a(i,12)*b(12,j) + $ + a(i,13)*b(13,j) + $ + a(i,14)*b(14,j) + $ + a(i,15)*b(15,j) + $ + a(i,16)*b(16,j) + $ + a(i,17)*b(17,j) + $ + a(i,18)*b(18,j) + $ + a(i,19)*b(19,j) + $ + a(i,20)*b(20,j) + $ + a(i,21)*b(21,j) + $ + a(i,22)*b(22,j) + $ + a(i,23)*b(23,j) + $ + a(i,24)*b(24,j) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine mxm44(a,n1,b,n2,c,n3) +C----------------------------------------------------------------------- +C +C NOTE -- this code has been set up with the "mxmf3" routine +c referenced in memtime.f. On most machines, the f2 +c and f3 versions give the same performance (f2 is the +c nekton standard). On the t3e, f3 is noticeably faster. +c pff 10/5/98 +C +C +C Matrix-vector product routine. +C NOTE: Use assembly coded routine if available. +C +C---------------------------------------------------------------------- + REAL A(N1,N2),B(N2,N3),C(N1,N3) +c + if (n2.eq.1) then + call mxm44_2_t(a,n1,b,2,c,n3) + elseif (n2.eq.2) then + call mxm44_2_t(a,n1,b,n2,c,n3) + else + call mxm44_0_t(a,n1,b,n2,c,n3) + endif +c + return + end +c +c----------------------------------------------------------------------- + subroutine mxm44_0_t(a, m, b, k, c, n) +* subroutine matmul44(m, n, k, a, lda, b, ldb, c, ldc) +* real*8 a(lda,k), b(ldb,n), c(ldc,n) + real a(m,k), b(k,n), c(m,n) + real s11, s12, s13, s14, s21, s22, s23, s24 + real s31, s32, s33, s34, s41, s42, s43, s44 +c +c matrix multiply with a 4x4 pencil +c + + mresid = iand(m,3) + nresid = iand(n,3) + m1 = m - mresid + 1 + n1 = n - nresid + 1 + + do i=1,m-mresid,4 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + s44 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + s14 = s14 + a(i,l)*b(l,j+3) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + s24 = s24 + a(i+1,l)*b(l,j+3) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + s34 = s34 + a(i+2,l)*b(l,j+3) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + s44 = s44 + a(i+3,l)*b(l,j+3) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + c(i,j+2) = s13 + c(i,j+3) = s14 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + + c(i+1,j+3) = s24 + c(i+2,j+3) = s34 + c(i+3,j+3) = s44 + enddo +* Residual when n is not multiple of 4 + if (nresid .ne. 0) then + if (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,n) + s21 = s21 + a(i+1,l)*b(l,n) + s31 = s31 + a(i+2,l)*b(l,n) + s41 = s41 + a(i+3,l)*b(l,n) + enddo + c(i,n) = s11 + c(i+1,n) = s21 + c(i+2,n) = s31 + c(i+3,n) = s41 + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + enddo + c(i,j) = s11 + c(i,j+1) = s12 + + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s41 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s42 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + s43 = 0.0d0 + do l=1,k + s11 = s11 + a(i,l)*b(l,j) + s12 = s12 + a(i,l)*b(l,j+1) + s13 = s13 + a(i,l)*b(l,j+2) + + s21 = s21 + a(i+1,l)*b(l,j) + s22 = s22 + a(i+1,l)*b(l,j+1) + s23 = s23 + a(i+1,l)*b(l,j+2) + + s31 = s31 + a(i+2,l)*b(l,j) + s32 = s32 + a(i+2,l)*b(l,j+1) + s33 = s33 + a(i+2,l)*b(l,j+2) + + s41 = s41 + a(i+3,l)*b(l,j) + s42 = s42 + a(i+3,l)*b(l,j+1) + s43 = s43 + a(i+3,l)*b(l,j+2) + enddo + c(i,j) = s11 + c(i+1,j) = s21 + c(i+2,j) = s31 + c(i+3,j) = s41 + c(i,j+1) = s12 + c(i+1,j+1) = s22 + c(i+2,j+1) = s32 + c(i+3,j+1) = s42 + c(i,j+2) = s13 + c(i+1,j+2) = s23 + c(i+2,j+2) = s33 + c(i+3,j+2) = s43 + endif + endif + enddo + +* Residual when m is not multiple of 4 + if (mresid .eq. 0) then + return + elseif (mresid .eq. 1) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,j) + s12 = s12 + a(m,l)*b(l,j+1) + s13 = s13 + a(m,l)*b(l,j+2) + s14 = s14 + a(m,l)*b(l,j+3) + enddo + c(m,j) = s11 + c(m,j+1) = s12 + c(m,j+2) = s13 + c(m,j+3) = s14 + enddo +* mresid is 1, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n) + enddo + c(m,n) = s11 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s12 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-1) + s12 = s12 + a(m,l)*b(l,n) + enddo + c(m,n-1) = s11 + c(m,n) = s12 + return + else + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + do l=1,k + s11 = s11 + a(m,l)*b(l,n-2) + s12 = s12 + a(m,l)*b(l,n-1) + s13 = s13 + a(m,l)*b(l,n) + enddo + c(m,n-2) = s11 + c(m,n-1) = s12 + c(m,n) = s13 + return + endif + elseif (mresid .eq. 2) then + do j=1,n-nresid,4 + s11 = 0.0d0 + s12 = 0.0d0 + s13 = 0.0d0 + s14 = 0.0d0 + s21 = 0.0d0 + s22 = 0.0d0 + s23 = 0.0d0 + s24 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,j) + s12 = s12 + a(m-1,l)*b(l,j+1) + s13 = s13 + a(m-1,l)*b(l,j+2) + s14 = s14 + a(m-1,l)*b(l,j+3) + + s21 = s21 + a(m,l)*b(l,j) + s22 = s22 + a(m,l)*b(l,j+1) + s23 = s23 + a(m,l)*b(l,j+2) + s24 = s24 + a(m,l)*b(l,j+3) + enddo + c(m-1,j) = s11 + c(m-1,j+1) = s12 + c(m-1,j+2) = s13 + c(m-1,j+3) = s14 + c(m,j) = s21 + c(m,j+1) = s22 + c(m,j+2) = s23 + c(m,j+3) = s24 + enddo +* mresid is 2, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n) + enddo + c(m-1,n) = s11 + c(m,n) = s21 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-1) + s12 = s12 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-1) + s22 = s22 + a(m,l)*b(l,n) + enddo + c(m-1,n-1) = s11 + c(m-1,n) = s12 + c(m,n-1) = s21 + c(m,n) = s22 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + do l=1,k + s11 = s11 + a(m-1,l)*b(l,n-2) + s12 = s12 + a(m-1,l)*b(l,n-1) + s13 = s13 + a(m-1,l)*b(l,n) + s21 = s21 + a(m,l)*b(l,n-2) + s22 = s22 + a(m,l)*b(l,n-1) + s23 = s23 + a(m,l)*b(l,n) + enddo + c(m-1,n-2) = s11 + c(m-1,n-1) = s12 + c(m-1,n) = s13 + c(m,n-2) = s21 + c(m,n-1) = s22 + c(m,n) = s23 + return + endif + else +* mresid is 3 + do j=1,n-nresid,4 + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + + s14 = 0.0d0 + s24 = 0.0d0 + s34 = 0.0d0 + + do l=1,k + s11 = s11 + a(m-2,l)*b(l,j) + s12 = s12 + a(m-2,l)*b(l,j+1) + s13 = s13 + a(m-2,l)*b(l,j+2) + s14 = s14 + a(m-2,l)*b(l,j+3) + + s21 = s21 + a(m-1,l)*b(l,j) + s22 = s22 + a(m-1,l)*b(l,j+1) + s23 = s23 + a(m-1,l)*b(l,j+2) + s24 = s24 + a(m-1,l)*b(l,j+3) + + s31 = s31 + a(m,l)*b(l,j) + s32 = s32 + a(m,l)*b(l,j+1) + s33 = s33 + a(m,l)*b(l,j+2) + s34 = s34 + a(m,l)*b(l,j+3) + enddo + c(m-2,j) = s11 + c(m-2,j+1) = s12 + c(m-2,j+2) = s13 + c(m-2,j+3) = s14 + + c(m-1,j) = s21 + c(m-1,j+1) = s22 + c(m-1,j+2) = s23 + c(m-1,j+3) = s24 + + c(m,j) = s31 + c(m,j+1) = s32 + c(m,j+2) = s33 + c(m,j+3) = s34 + enddo +* mresid is 3, check nresid + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n) + enddo + c(m-2,n) = s11 + c(m-1,n) = s21 + c(m,n) = s31 + return + elseif (nresid .eq. 2) then + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-1) + s12 = s12 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-1) + s22 = s22 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-1) + s32 = s32 + a(m,l)*b(l,n) + enddo + c(m-2,n-1) = s11 + c(m-2,n) = s12 + c(m-1,n-1) = s21 + c(m-1,n) = s22 + c(m,n-1) = s31 + c(m,n) = s32 + return + else + s11 = 0.0d0 + s21 = 0.0d0 + s31 = 0.0d0 + s12 = 0.0d0 + s22 = 0.0d0 + s32 = 0.0d0 + s13 = 0.0d0 + s23 = 0.0d0 + s33 = 0.0d0 + do l=1,k + s11 = s11 + a(m-2,l)*b(l,n-2) + s12 = s12 + a(m-2,l)*b(l,n-1) + s13 = s13 + a(m-2,l)*b(l,n) + s21 = s21 + a(m-1,l)*b(l,n-2) + s22 = s22 + a(m-1,l)*b(l,n-1) + s23 = s23 + a(m-1,l)*b(l,n) + s31 = s31 + a(m,l)*b(l,n-2) + s32 = s32 + a(m,l)*b(l,n-1) + s33 = s33 + a(m,l)*b(l,n) + enddo + c(m-2,n-2) = s11 + c(m-2,n-1) = s12 + c(m-2,n) = s13 + c(m-1,n-2) = s21 + c(m-1,n-1) = s22 + c(m-1,n) = s23 + c(m,n-2) = s31 + c(m,n-1) = s32 + c(m,n) = s33 + return + endif + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxm44_2_t(a, m, b, k, c, n) + real a(m,2), b(2,n), c(m,n) + + nresid = iand(n,3) + n1 = n - nresid + 1 + + do j=1,n-nresid,4 + do i=1,m + c(i,j) = a(i,1)*b(1,j) + $ + a(i,2)*b(2,j) + c(i,j+1) = a(i,1)*b(1,j+1) + $ + a(i,2)*b(2,j+1) + c(i,j+2) = a(i,1)*b(1,j+2) + $ + a(i,2)*b(2,j+2) + c(i,j+3) = a(i,1)*b(1,j+3) + $ + a(i,2)*b(2,j+3) + enddo + enddo + if (nresid .eq. 0) then + return + elseif (nresid .eq. 1) then + do i=1,m + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + elseif (nresid .eq. 2) then + do i=1,m + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + else + do i=1,m + c(i,n-2) = a(i,1)*b(1,n-2) + $ + a(i,2)*b(2,n-2) + c(i,n-1) = a(i,1)*b(1,n-1) + $ + a(i,2)*b(2,n-1) + c(i,n) = a(i,1)*b(1,n) + $ + a(i,2)*b(2,n) + enddo + endif + + return + end +c----------------------------------------------------------------------- + subroutine mxmtest(s,nn,cn,mxmt,name,k,ivb) + + real s(nn,2) ! MFLOPS + character*5 cn ! name + character*5 name + external mxmt + + include 'SIZE' + parameter (lt=4*lx1*ly1*lz1*lelt) + common /scrns/ a(lt) + common /scruz/ b(lt) + common /scrmg/ c(lt) + + integer ll,icalld + save ll,icalld + data ll,icalld /1,0/ + + if (icalld.eq.0) then ! Initialize matrices: + icalld = icalld + 1 + time1 = dnekclock() + call initab(a,b,lt) + time2 = dnekclock()-time1 + if (nid.eq.0) write(6,*) 'mxm test init:',lt,time2,name + endif + + + cn = name + +c Rectangular matrix tests + + nn0 = 1 + nn1 = nn + if (ivb.eq.0) then + nn0 = lx1 + nn1 = lx1 + endif + + m = k + do n=nn0,nn1 + n1 = n + n2 = n + n3 = n + if (m.eq.1) n1 = n*n + if (m.eq.3) n3 = n*n + if (lt.gt.n1*n3) then + n13 = max(n1,n3) + loop = 250000/(n1*n2*n3) + 500 + if (name.eq.'madd ') loop = 200000/(n1*n3) + 5000 + +c------------------------------------------------------- +c mem test +c------------------------------------------------------- + + t0 = dnekclock() + overh = dnekclock()-t0 + time1 = dnekclock() + do l=1,loop + if (ll.ge.lt-n1*n3) ll = 1 + call mxmt(a(ll),n1,b(ll),n2,c(ll),n3) + ll = ll+n1*n3 + enddo + time2 = dnekclock() + time = time2-time1 - overh + iops=loop*n1*n3*(2*n2-1) + if (name.eq.'madd ') iops = loop*n1*n3 +c write(6,*) loop,time,time2,time1,overh + flops=iops/(1.0e6*time) + s(n,1) = flops +c + timel = time/loop + if (nid.eq.0) write(6,199) n,n1,n2,n3,flops,timel,name + 199 format(i3,'m',1x,3i6,f10.4,e16.5,3x,a5,' mem') +c +c------------------------------------------------------- +c fast test +c------------------------------------------------------- +c + call mxmt(a,n1,b,n2,c,n3) + t0 = dnekclock() + overh = dnekclock()-t0 + time1 = dnekclock() + do l=1,loop + call mxmt(a,n1,b,n2,c,n3) + enddo + time2 = dnekclock() + time = time2-time1 - overh + iops=loop*n1*n3*(2*n2-1) + if (name.eq.'madd ') iops = loop*n1*n3 + flops=iops/(1.0e6*time) + s(n,2) = flops + timel = time/loop +c + if (nid.eq.0) write(6,198) n,n1,n2,n3,flops,timel,name + 198 format(i3,'f',1x,3i6,f10.4,e16.5,3x,a5,' fast') +c + endif + enddo +c + return + end +c----------------------------------------------------------------------- + subroutine mxm_analyze(s,a,nn,c,nt,ivb) + include 'SIZE' + + character*5 c(3,nt) + real s(nn,2,nt,3) ! Measured Mflops, 3 cases + real a(nn,2,nt,3) +c ^ ^ ^ |__ N^2xN, NxN, NxN^2 +c matrix order N __| | |__________which mxm +c | +c |__cached vs. noncached data + + + integer itmax(200) + + nn0 = 1 + nn1 = nn + if (ivb.eq.0) then + nn0 = lx1 + nn1 = lx1 + endif + + do n = nn0,nn1 + fmax = 0. ! Peak mflops + do it=1,nt + ai = 0. + di = 0. + do k=1,3 + if (s(n,1,it,k).gt.0) then ! Take harmonic means of + ai = ai + 1./s(n,1,it,k) ! case I II and III for + di = di + 1. ! mem test, s(n,1...). + endif + enddo + if (ai.gt.0) ai = di/ai + a(n,1,it,1) = di/ai + if (ai.gt.fmax.and.c(2,it).ne.'madd ') then + fmax = ai + itmax(n) = it + endif + enddo + it = itmax(n) + if (nid.eq.0) write(6,3) n,it,c(2,it),(s(n,1,it,k),k=1,3),fmax + 3 format(i3,i2,1x,a5,4f12.0,' Peak harmonic') + enddo + call out_anal(s,a,nn,c,nt,itmax,'Harmonic',1,ivb) +c +c Case by case +c + do k=1,3 + do n = nn0,nn1 + fmax = 0. ! Peak mflops + do it=1,nt + ai = s(n,1,it,k) + if (ai.gt.fmax.and.c(2,it).ne.'madd ') then + fmax = ai + itmax(n) = it + endif + enddo + enddo + if (k.eq.1) call out_anal(s,a,nn,c,nt,itmax,'Case N2N',k,ivb) + if (k.eq.2) call out_anal(s,a,nn,c,nt,itmax,'Case NxN',k,ivb) + if (k.eq.3) call out_anal(s,a,nn,c,nt,itmax,'Case NN2',k,ivb) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine out_anal(s,a,nn,c,nt,itmax,name8,k,ivb) + include 'SIZE' + + character*5 c(3,nt) + real s(nn,2,nt,3) + real a(nn,2,nt,3) + integer itmax(200) + character*8 name8 + + if (nid.ne.0) return + + nn0 = 1 + nn1 = nn + if (ivb.eq.0) then + nn0 = lx1 + nn1 = lx1 + endif + + + do n=nn0,nn1 + it = itmax(n) + write(6,1) n,s(n,1,it,k),c(2,it),name8 + 1 format(i4,f14.0,4x,a5,4x,a8,' MxM MFLOPS') + enddo + + return + end +c----------------------------------------------------------------------- diff --git a/src/mxm_wrapper.f b/src/mxm_wrapper.f new file mode 100644 index 0000000..7c3d111 --- /dev/null +++ b/src/mxm_wrapper.f @@ -0,0 +1,165 @@ + subroutine mxm(a,n1,b,n2,c,n3) + +#if defined(XSMM_DISPATCH) + USE :: LIBXSMM +#endif + +#define LIBXSMM_DMM1(N, a, b, c) LIBXSMM_DMM1_str(N, a, b, c) +#define LIBXSMM_DMM1_str(N, a, b, c) libxsmm_dmm_##N##x##N##_##N##_##N(a, b, c) +#define LIBXSMM_DMM2(N, a, b, c) LIBXSMM_DMM2_str(N, a, b, c) +#define LIBXSMM_DMM2_str(N, a, b, c) libxsmm_dmm_##N##_##N##x##N##_##N(a, b, c) +#define LIBXSMM_DMM3(N, a, b, c) LIBXSMM_DMM3_str(N, a, b, c) +#define LIBXSMM_DMM3_str(N, a, b, c) libxsmm_dmm_##N##_##N##_##N(a, b, c) + +c +c Compute matrix-matrix product C = A*B +c for contiguously packed matrices A,B, and C. +c +#if defined (MKL) +# include "mkl_direct_call.fi" +#endif + real a(n1,n2),b(n2,n3),c(n1,n3) + real alpha, beta +c + include 'SIZE' + include 'TOTAL' +c + integer aligned + integer K10_mxm + integer init, prevn2 + +#if defined(XSMM_DISPATCH) + TYPE(LIBXSMM_DMMFUNCTION) :: xmm1,xmm2,xmm3 +#endif + + data init /0/, prevn2 /0/ + save init, prevn2 +#if defined(XSMM_DISPATCH) + save xsmm1, xsmm2, xsmm3 +#endif + +c write(*,*) "in", init, prevn2, LOC(xsmm1), LOC(xsmm2), LOC(xsmm3) + +#if defined (MKL) + alpha = 1.0 + beta = 0.0 + call dgemm('N','N',n1,n3,n2,alpha,A,n1,B,n2,beta,C,n1) +#elif defined (BLAS_MXM) + alpha = 1.0 + beta = 0.0 + call dgemm('N','N',n1,n3,n2,alpha,a,n1,b,n2,beta,c,n1) +#elif defined (BG) + call bg_aligned3(a,b,c,aligned) + if (n2.eq.2) then + call mxm44_2(a,n1,b,n2,c,n3) + else if ((aligned.eq.1) .and. + $ (n1.ge.8) .and. (n2.ge.8) .and. (n3.ge.8) .and. + $ (modulo(n1,2).eq.0) .and. (modulo(n2,2).eq.0) ) then + if (modulo(n3,4).eq.0) then + call bg_mxm44(a,n1,b,n2,c,n3) + else + call bg_mxm44_uneven(a,n1,b,n2,c,n3) + endif + else if((aligned.eq.1) .and. + $ (modulo(n1,6).eq.0) .and. (modulo(n3,6).eq.0) .and. + $ (n2.ge.4) .and. (modulo(n2,2).eq.0) ) then + call bg_mxm3(a,n1,b,n2,c,n3) + else + call mxm44_0(a,n1,b,n2,c,n3) + endif +#elif defined (K10_MXM) + ! fow now only supported for lx1=8 + ! tuned for AMD K10 + ierr = K10_mxm(a,n1,b,n2,c,n3) + if (ierr.gt.0) call mxmf2(a,n1,b,n2,c,n3) +#elif defined (XSMM_DISPATCH) + if (init == 0) then + CALL libxsmm_init() + init = 1 + write(*,*) "initializing libxsmm" + end if + + if (prevn2 /= n2) then + prevn2 = n2 + + CALL libxsmm_dispatch(xmm1, n2, n2, n2*n2, alpha=1D0, beta=0D0) +c write(*,*) "initialized xmm1" + IF (.NOT. libxsmm_available(xmm1)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + + CALL libxsmm_dispatch(xmm2, n2, n2, n2, alpha=1D0, beta=0D0) +c write(*,*) "initialized xmm2" + IF (.NOT. libxsmm_available(xmm2)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + + CALL libxsmm_dispatch(xmm3, n2*n2, n2, n2, alpha=1D0, beta=0D0) +c write(*,*) "initialized xmm3" + IF (.NOT. libxsmm_available(xmm3)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + end if + + if (n1 .eq. n2*n2) then +c write(*,*) "call to xmm3", n1, n2, n3 + IF (.NOT. libxsmm_available(xmm3)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + call libxsmm_call(xmm3, C_LOC(a), C_LOC(b), C_LOC(c)) +c call libxsmm_dmm_256_16_16(a, b, c) + else if (n3 .eq. n2*n2) then +c write(*,*) "call to xmm1", n1, n2, n3 + IF (.NOT. libxsmm_available(xmm1)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + call libxsmm_call(xmm1, C_LOC(a), C_LOC(b), C_LOC(c)) +c call libxsmm_dmm_16_256_16(a, b, c) + else +c write(*,*) "call to xmm2", n1, n2, n3 + IF (.NOT. libxsmm_available(xmm2)) THEN + write(*,*) " ** Error: unable to dispatch libxsmm call" + STOP + END IF + call libxsmm_call(xmm2, C_LOC(a), C_LOC(b), C_LOC(c)) +c call libxsmm_dmm_16_16_16(a, b, c) + end if +#elif defined (XSMM_FIXED) + if (n2 .eq. NPOLY) then + if (n1 .eq. n2*n2) then + call LIBXSMM_DMM1(NPOLY, a, b, c) + else if (n3 .eq. n2*n2) then + call LIBXSMM_DMM2(NPOLY, a, b, c) + else + call LIBXSMM_DMM3(NPOLY, a, b, c) + end if + else + write(*,*) "Invalid matrix size" + stop + end if +#elif defined (XSMM) + alpha = 1.0 + beta = 0.0 + CALL libxsmm_dgemm('N','N',n1,n3,n2,alpha,A,n1,B,n2,beta,C,n1) +#elif defined (MXMBASIC) + do j=1,n3 + do i=1,n1 + c(i,j) = 0.0 + do k=1,n2 + c(i,j) = c(i,j) + a(i,k)*b(k,j) + enddo + enddo + enddo +#else + call mxmf2(a,n1,b,n2,c,n3) +#endif + +c write(*,*) "out", init, prevn2, xsmm1, xsmm2, xsmm3 + + return + end diff --git a/src/omp.f b/src/omp.f new file mode 100644 index 0000000..1591214 --- /dev/null +++ b/src/omp.f @@ -0,0 +1,128 @@ +#ifdef TIMERS +#define NBTIMER(a) a = dnekclock() +#define STIMER(a) a = dnekclock_sync() +#define ACCUMTIMER(b,a) b = b + (dnekclock()- a) +#else +#define NBTIMER(a) +#define STIMER(a) +#define ACCUMTIMER(a,b) +#endif + + + subroutine rzeroi(a,n,start,fin) + implicit none + + real a(n) + integer n, i, start, fin + + do i = start, fin + a(i) = 0.0 + end do + + return + end subroutine + +c---------------------------------------------------------- + + subroutine copyi(a,b,n, start, fin) + implicit none + + real a(n),b(n) + integer n, i, start, fin + + do i=start,fin + a(i)=b(i) + enddo + + return + end subroutine + +c---------------------------------------------------------- + + subroutine glsc3i(val,a,b,mult,n,find,lind) + implicit none + + include 'TIMER' + + real val,a(n),b(n),mult(n) + real tsum,psum,work(1) + integer n,find,lind + integer i, tmt, thread + integer omp_get_thread_num + + save psum + data psum /0.0/ + + thread = 0 +#ifdef _OPENMP + thread = omp_get_thread_num() +#endif + tmt = thread + 1 + + tsum = 0.0 + do i=find, lind + tsum = tsum + a(i)*b(i)*mult(i) + end do + +c$OMP ATOMIC update + psum = psum + tsum +c$OMP END ATOMIC + +c$OMP BARRIER + NBTIMER(ttemp4) +c$OMP MASTER + call gop(psum,work,'+ ',1) + val = psum + psum = 0.0 +c$OMP END MASTER +c$OMP BARRIER + ACCUMTIMER(tgop(gopi(tmt),tmt), ttemp4) + + + return + end subroutine + +c---------------------------------------------------------- + + subroutine solveMi(z,r,n,start,fin) + implicit none + + real z(n),r(n) + integer n,start,fin + + call copyi(z,r,n,start,fin) + + return + end + +c---------------------------------------------------------- + + subroutine add2s1i(a,b,c1,n,start,fin) + implicit none + + real a(n),b(n),c1 + integer n,start,fin + integer i + + do i= start, fin + a(i)=c1*a(i)+b(i) + end do + + return + end subroutine + +c---------------------------------------------------------- + + subroutine add2s2i(a,b,c1,n,start,fin) + implicit none + + real a(n),b(n),c1 + integer n,start,fin + integer i + + do i= start,fin + a(i)=a(i)+c1*b(i) + end do + + return + end subroutine diff --git a/src/prox_dssum.f b/src/prox_dssum.f new file mode 100644 index 0000000..c3c0402 --- /dev/null +++ b/src/prox_dssum.f @@ -0,0 +1,174 @@ +c----------------------------------------------------------------------- + subroutine dssum(f) + include 'SIZE' + include 'TOTAL' + real f(1) + +c call nekgsync() + call gs_op(gsh,f,1,1,0) ! Gather-scatter operation ! w = QQ w + + return + end +c----------------------------------------------------------------------- + subroutine proxy_setupds(gs_handle) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + + integer gs_handle,dof + integer*8 glo_num(lx1*ly1*lz1*lelt) + + common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal + + t0 = dnekclock() + + call set_vert_box(glo_num) ! Set global-to-local map + + ntot = nx1*ny1*nz1*nelt + call gs_setup(gs_handle,glo_num,ntot,nekcomm,mp) ! Initialize gather-scatter + dof = ntot *mp + t1 = dnekclock() - t0 + if (nid.eq.0) then + write(6,1) t1,gs_handle,nx1,dof + 1 format(' setupds time',1pe11.4,' seconds ',2i3,i12) + endif + + return + end +c----------------------------------------------------------------------- + subroutine set_vert_box(glo_num) + +c Set up global numbering for elements in a box + + include 'SIZE' + include 'PARALLEL' + + integer*8 glo_num(1),ii,kg,jg,ig ! The latter 3 for proper promotion + + integer e,ex,ey,ez,eg + + nn = nx1-1 ! nn := polynomial order + + do e=1,nelt + eg = lglel(e) + call get_exyz(ex,ey,ez,eg,nelx,nely,nelz) + do k=0,nn + do j=0,nn + do i=0,nn + kg = nn*(ez-1) + k + jg = nn*(ey-1) + j + ig = nn*(ex-1) + i + ii = 1 + ig + jg*(nn*nelx+1) + kg*(nn*nelx+1)*(nn*nely+1) + ll = 1 + i + nx1*j + nx1*ny1*k + nx1*ny1*nz1*(e-1) + glo_num(ll) = ii + enddo + enddo + enddo + enddo + + return + end +c----------------------------------------------------------------------- + subroutine get_exyz(ex,ey,ez,eg,nelx,nely,nelz) + integer ex,ey,ez,eg + + nelxy = nelx*nely + + ez = 1 + (eg-1)/nelxy + ey = mod1 (eg,nelxy) + ey = 1 + (ey-1)/nelx + ex = mod1 (eg,nelx) + + return + end +c----------------------------------------------------------------------- + subroutine outmat_glo_num(glo_num) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + + integer*8 glo_num(lx1*ly1*lz1,lelt) + + integer e + + do e=1,nelt + call outmat_e_i8(glo_num(1,e),e) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine outmat_e_i8(gn,e) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + + integer*8 gn(lx1,ly1,lz1) + + integer e + + write(6,*) + write(6,2) e + write(6,*) + + do k0=3,1,-2 + + k1=k0+1 + do j=ny1,1,-1 + write(6,1) ((gn(i,j,k),i=1,4),k=k0,k1) + enddo + write(6,*) + + enddo + 1 format('gn: ',4i8,3x,4i8) + 2 format('gn: element: ',i4) + + return + end +c----------------------------------------------------------------------- + subroutine outmat_r(x,name5) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + character*5 name5 + + real x(lx1*ly1*lz1,lelt) + + integer e + + do e=1,nelt + call outmat_e_r(x(1,e),name5,e) + enddo + + return + end +c----------------------------------------------------------------------- + subroutine outmat_e_r(x,name5,e) + include 'SIZE' + include 'INPUT' + include 'PARALLEL' + character*5 name5 + + real x(lx1,ly1,lz1) + + integer e + + write(6,*) + write(6,2) e,name5 + write(6,*) + + do k0=3,1,-2 + + k1=k0+1 + do j=ny1,1,-1 + write(6,1) ((x(i,j,k),i=1,4),k=k0,k1) + enddo + write(6,*) + + enddo + 1 format('mat: ',4f8.3,3x,4f8.3) + 2 format('mat: element: ',i4,2x,a5) + + return + end +c----------------------------------------------------------------------- diff --git a/src/prox_setup.f b/src/prox_setup.f new file mode 100644 index 0000000..fabe8c5 --- /dev/null +++ b/src/prox_setup.f @@ -0,0 +1,113 @@ +c----------------------------------------------------------------------- + subroutine proxy_setup(a,b,c,d,z,w,g) + + include 'SIZE' + include 'TOTAL' + + real a(lx1*lx1),b(lx1),c(lx1*lx1),d(lx1*lx1),z(lx1) + $ , w(lx1*2),g(6,lx1*ly1*lz1*lelt) + + call semhat(a,b,c,d,z,w,nx1-1) + + n = nx1*nx1 + call copy(dxm1,d,n) + call transpose(dxtm1,nx1,dxm1,nx1) + + call copy(zgm1,z,nx1) ! GLL points + call copy(wxm1,b,nx1) ! GLL weights + + call setup_g(g) + +c m = nx1*ny1*nz1*nelt +c call outmat(g,6,m,'gxyz 1',m) + + return + end +c------------------------------------------------------------------------- + subroutine setup_g(g) + + include 'SIZE' + include 'TOTAL' + real g(6,nx1,ny1,nz1,nelt) + integer e + + n = nx1*ny1*nz1*nelt + + + do e=1,nelt + do k=1,nz1 + do j=1,ny1 + do i=1,nx1 + call rzero(g(1,i,j,k,e),6) + g(1,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + g(4,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + g(6,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + g(6,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k) + enddo + enddo + enddo + enddo + + return + end +c------------------------------------------------------------------------- + subroutine transpose(a,lda,b,ldb) + real a(lda,1),b(ldb,1) +c + do j=1,ldb + do i=1,lda + a(i,j) = b(j,i) + enddo + enddo + return + end +c----------------------------------------------------------------------- + subroutine outmat(a,m,n,name6,ie) + real a(m,n) + character*6 name6 +c + n10 = min(n,10) + write(6,*) + write(6,*) ie,' matrix: ',name6,m,n + do i=1,m + write(6,6) ie,name6,(a(i,j),j=1,n10) + enddo + 6 format(i3,1x,a6,1p10e12.4) + write(6,*) + return + end +c----------------------------------------------------------------------- + subroutine outmat1(a,m,n,name6,ie) + real a(m,n) + character*6 name6 +c + n10 = min(n,10) + write(ie,*) + write(ie,*) ie,' matrix: ',name6,m,n + do i=1,m + write(ie,6) ie,name6,(a(i,j),j=1,n10) + enddo + 6 format(i3,1x,a6,1p10e12.4) + write(ie,*) + return + end +c----------------------------------------------------------------------- + function randx(seed) + +#ifdef BGQ +#define M_SIN(X) _sin((X)) +#define M_COS(X) _cos((X)) +#else +#define M_SIN(X) sin((X)) +#define M_COS(X) cos((X)) +#endif + + arg = 1.e9*seed + arg = 1.e9*M_COS(arg) + randx = M_SIN(arg) + seed = randx + seed = randx + + return + end +c----------------------------------------------------------------------- diff --git a/src/semhat.f b/src/semhat.f new file mode 100644 index 0000000..1ad8d23 --- /dev/null +++ b/src/semhat.f @@ -0,0 +1,94 @@ +c----------------------------------------------------------------------- + subroutine semhat(a,b,c,d,z,w,n) +c +c Generate matrices for single element, 1D operators: +c +c a = Laplacian +c b = diagonal mass matrix (GLL weights) +c c = convection operator b*d +c d = derivative matrix +c z = GLL points + + real a(0:n,0:n),b(0:n),c(0:n,0:n),d(0:n,0:n),z(0:n) + real w(0:2*n) + + np = n+1 + + call zwgll (z,b,np) + + do i=0,n + call fd_weights_full(z(i),z,n,1,w) + do j=0,n + d(i,j) = w(j+np) ! Derivative matrix + enddo + enddo + + call rzero(a,np*np) + do j=0,n + do i=0,n + do k=0,n + a(i,j) = a(i,j) + d(k,i)*b(k)*d(k,j) + enddo + c(i,j) = b(i)*d(i,j) + enddo + enddo + + return + end +c----------------------------------------------------------------------- + subroutine fd_weights_full(xx,x,n,m,c) +c +c This routine evaluates the derivative based on all points +c in the stencils. It is more memory efficient than "fd_weights" +c +c This set of routines comes from the appendix of +c A Practical Guide to Pseudospectral Methods, B. Fornberg +c Cambridge Univ. Press, 1996. (pff) +c +c Input parameters: +c xx -- point at wich the approximations are to be accurate +c x -- array of x-ordinates: x(0:n) +c n -- polynomial degree of interpolant (# of points := n+1) +c m -- highest order of derivative to be approxxmated at xi +c +c Output: +c c -- set of coefficients c(0:n,0:m). +c c(j,k) is to be applied at x(j) when +c the kth derivative is approxxmated by a +c stencil extending over x(0),x(1),...x(n). +c +c + real x(0:n),c(0:n,0:m) + + c1 = 1. + c4 = x(0) - xx + + do k=0,m + do j=0,n + c(j,k) = 0. + enddo + enddo + c(0,0) = 1. + + do i=1,n + mn = min(i,m) + c2 = 1. + c5 = c4 + c4 = x(i)-xx + do j=0,i-1 + c3 = x(i)-x(j) + c2 = c2*c3 + do k=mn,1,-1 + c(i,k) = c1*(k*c(i-1,k-1)-c5*c(i-1,k))/c2 + enddo + c(i,0) = -c1*c5*c(i-1,0)/c2 + do k=mn,1,-1 + c(j,k) = (c4*c(j,k)-k*c(j,k-1))/c3 + enddo + c(j,0) = c4*c(j,0)/c3 + enddo + c1 = c2 + enddo + return + end +c----------------------------------------------------------------------- diff --git a/src/speclib.f b/src/speclib.f new file mode 100644 index 0000000..6c86b64 --- /dev/null +++ b/src/speclib.f @@ -0,0 +1,1176 @@ +C============================================================================== +C +C LIBRARY ROUTINES FOR SPECTRAL METHODS +C +C March 1989 +C +C For questions, comments or suggestions, please contact: +C +C Einar Malvin Ronquist +C Room 3-243 +C Department of Mechanical Engineering +C Massachusetts Institute of Technology +C 77 Massachusetts Avenue +C Cambridge, MA 0299 +C U.S.A. +C +C------------------------------------------------------------------------------ +C +C ABBRIVIATIONS: +C +C M - Set of mesh points +C Z - Set of collocation/quadrature points +C W - Set of quadrature weights +C H - Lagrangian interpolant +C D - Derivative operator +C I - Interpolation operator +C GL - Gauss Legendre +C GLL - Gauss-Lobatto Legendre +C GJ - Gauss Jacobi +C GLJ - Gauss-Lobatto Jacobi +C +C +C MAIN ROUTINES: +C +C Points and weights: +C +C ZWGL Compute Gauss Legendre points and weights +C ZWGLL Compute Gauss-Lobatto Legendre points and weights +C ZWGJ Compute Gauss Jacobi points and weights (general) +C ZWGLJ Compute Gauss-Lobatto Jacobi points and weights (general) +C +C Lagrangian interpolants: +C +C HGL Compute Gauss Legendre Lagrangian interpolant +C HGLL Compute Gauss-Lobatto Legendre Lagrangian interpolant +C HGJ Compute Gauss Jacobi Lagrangian interpolant (general) +C HGLJ Compute Gauss-Lobatto Jacobi Lagrangian interpolant (general) +C +C Derivative operators: +C +C DGLL Compute Gauss-Lobatto Legendre derivative matrix +C DGLLGL Compute derivative matrix for a staggered mesh (GLL->GL) +C DGJ Compute Gauss Jacobi derivative matrix (general) +C DGLJ Compute Gauss-Lobatto Jacobi derivative matrix (general) +C DGLJGJ Compute derivative matrix for a staggered mesh (GLJ->GJ) (general) +C +C Interpolation operators: +C +C IGLM Compute interpolation operator GL -> M +C IGLLM Compute interpolation operator GLL -> M +C IGJM Compute interpolation operator GJ -> M (general) +C IGLJM Compute interpolation operator GLJ -> M (general) +C +C Other: +C +C PNLEG Compute Legendre polynomial of degree N +C PNDLEG Compute derivative of Legendre polynomial of degree N +C +C Comments: +C +C Note that many of the above routines exist in both single and +C double precision. If the name of the single precision routine is +C SUB, the double precision version is called SUBD. In most cases +C all the "low-level" arithmetic is done in double precision, even +C for the single precsion versions. +C +C Useful references: +C +C [1] Gabor Szego: Orthogonal Polynomials, American Mathematical Society, +C Providence, Rhode Island, 1939. +C [2] Abramowitz & Stegun: Handbook of Mathematical Functions, +C Dover, New York, 1972. +C [3] Canuto, Hussaini, Quarteroni & Zang: Spectral Methods in Fluid +C Dynamics, Springer-Verlag, 1988. +C +C +C============================================================================== +C +C-------------------------------------------------------------------- + SUBROUTINE ZWGL (Z,W,NP) +C-------------------------------------------------------------------- +C +C Generate NP Gauss Legendre points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha=0,beta=0). +C The polynomial degree N=NP-1. +C Z and W are in single precision, but all the arithmetic +C operations are done in double precision. +C +C-------------------------------------------------------------------- + REAL Z(1),W(1) + ALPHA = 0. + BETA = 0. + CALL ZWGJ (Z,W,NP,ALPHA,BETA) + RETURN + END +C + SUBROUTINE ZWGLL (Z,W,NP) +C-------------------------------------------------------------------- +C +C Generate NP Gauss-Lobatto Legendre points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha=0,beta=0). +C The polynomial degree N=NP-1. +C Z and W are in single precision, but all the arithmetic +C operations are done in double precision. +C +C-------------------------------------------------------------------- + REAL Z(1),W(1) + ALPHA = 0. + BETA = 0. + CALL ZWGLJ (Z,W,NP,ALPHA,BETA) + RETURN + END +C + SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Single precision version. +C +C-------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD(NZD),WD(NZD) + REAL Z(1),W(1),ALPHA,BETA +C + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in ZWGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + CALL ZWGJD (ZD,WD,NP,ALPHAD,BETAD) + DO 100 I=1,NP + Z(I) = ZD(I) + W(I) = WD(I) + 100 CONTINUE + RETURN + END +C + SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Double precision version. +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z(1),W(1),ALPHA,BETA +C + N = NP-1 + DN = ((N)) + ONE = 1. + TWO = 2. + APB = ALPHA+BETA +C + IF (NP.LE.0) THEN + WRITE (6,*) 'ZWGJD: Minimum number of Gauss points is 1',np + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'ZWGJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + IF (NP.EQ.1) THEN + Z(1) = (BETA-ALPHA)/(APB+TWO) + W(1) = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)/GAMMAF(APB+TWO) + $ * TWO**(APB+ONE) + RETURN + ENDIF +C + CALL JACG (Z,NP,ALPHA,BETA) +C + NP1 = N+1 + NP2 = N+2 + DNP1 = ((NP1)) + DNP2 = ((NP2)) + FAC1 = DNP1+ALPHA+BETA+ONE + FAC2 = FAC1+DNP1 + FAC3 = FAC2+ONE + FNORM = PNORMJ(NP1,ALPHA,BETA) + RCOEF = (FNORM*FAC2*FAC3)/(TWO*FAC1*DNP2) + DO 100 I=1,NP + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP2,ALPHA,BETA,Z(I)) + W(I) = -RCOEF/(P*PDM1) + 100 CONTINUE + RETURN + END +C + SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Single precision version. +C +C-------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD(NZD),WD(NZD) + REAL Z(1),W(1),ALPHA,BETA +C + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in ZWGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + CALL ZWGLJD (ZD,WD,NP,ALPHAD,BETAD) + DO 100 I=1,NP + Z(I) = ZD(I) + W(I) = WD(I) + 100 CONTINUE + RETURN + END +C + SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W) +C associated with Jacobi polynomial P(N)(alpha>-1,beta>-1). +C The polynomial degree N=NP-1. +C Double precision version. +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z(NP),W(NP),ALPHA,BETA +C + N = NP-1 + NM1 = N-1 + ONE = 1. + TWO = 2. +C + IF (NP.LE.1) THEN + WRITE (6,*) 'ZWGLJD: Minimum number of Gauss-Lobatto points is 2' + WRITE (6,*) 'ZWGLJD: alpha,beta:',alpha,beta,np + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'ZWGLJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + IF (NM1.GT.0) THEN + ALPG = ALPHA+ONE + BETG = BETA+ONE + CALL ZWGJD (Z(2),W(2),NM1,ALPG,BETG) + ENDIF + Z(1) = -ONE + Z(NP) = ONE + DO 100 I=2,NP-1 + W(I) = W(I)/(ONE-Z(I)**2) + 100 CONTINUE + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(1)) + W(1) = ENDW1 (N,ALPHA,BETA)/(TWO*PD) + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(NP)) + W(NP) = ENDW2 (N,ALPHA,BETA)/(TWO*PD) +C + RETURN + END +C + REAL*8 FUNCTION ENDW1 (N,ALPHA,BETA) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 ALPHA,BETA + ZERO = 0. + ONE = 1. + TWO = 2. + THREE = 3. + FOUR = 4. + APB = ALPHA+BETA + IF (N.EQ.0) THEN + ENDW1 = ZERO + RETURN + ENDIF + F1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) + F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO + IF (N.EQ.1) THEN + ENDW1 = F1 + RETURN + ENDIF + FINT1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) + FINT1 = FINT1*TWO**(APB+TWO) + FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) + FINT2 = FINT2*TWO**(APB+THREE) + F2 = (-TWO*(BETA+TWO)*FINT1 + (APB+FOUR)*FINT2) + $ * (APB+THREE)/FOUR + IF (N.EQ.2) THEN + ENDW1 = F2 + RETURN + ENDIF + DO 100 I=3,N + DI = ((I-1)) + ABN = ALPHA+BETA+DI + ABNN = ABN+DI + A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) + A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) + A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) + F3 = -(A2*F2+A1*F1)/A3 + F1 = F2 + F2 = F3 + 100 CONTINUE + ENDW1 = F3 + RETURN + END +C + REAL*8 FUNCTION ENDW2 (N,ALPHA,BETA) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 ALPHA,BETA + ZERO = 0. + ONE = 1. + TWO = 2. + THREE = 3. + FOUR = 4. + APB = ALPHA+BETA + IF (N.EQ.0) THEN + ENDW2 = ZERO + RETURN + ENDIF + F1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) + F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO + IF (N.EQ.1) THEN + ENDW2 = F1 + RETURN + ENDIF + FINT1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) + FINT1 = FINT1*TWO**(APB+TWO) + FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) + FINT2 = FINT2*TWO**(APB+THREE) + F2 = (TWO*(ALPHA+TWO)*FINT1 - (APB+FOUR)*FINT2) + $ * (APB+THREE)/FOUR + IF (N.EQ.2) THEN + ENDW2 = F2 + RETURN + ENDIF + DO 100 I=3,N + DI = ((I-1)) + ABN = ALPHA+BETA+DI + ABNN = ABN+DI + A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) + A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) + A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) + F3 = -(A2*F2+A1*F1)/A3 + F1 = F2 + F2 = F3 + 100 CONTINUE + ENDW2 = F3 + RETURN + END +C + REAL*8 FUNCTION GAMMAF (X) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 X + ZERO = 0.0 + HALF = 0.5 + ONE = 1.0 + TWO = 2.0 + FOUR = 4.0 + PI = FOUR*ATAN(ONE) + GAMMAF = ONE + IF (X.EQ.-HALF) GAMMAF = -TWO*SQRT(PI) + IF (X.EQ. HALF) GAMMAF = SQRT(PI) + IF (X.EQ. ONE ) GAMMAF = ONE + IF (X.EQ. TWO ) GAMMAF = ONE + IF (X.EQ. 1.5 ) GAMMAF = SQRT(PI)/2. + IF (X.EQ. 2.5) GAMMAF = 1.5*SQRT(PI)/2. + IF (X.EQ. 3.5) GAMMAF = 0.5*(2.5*(1.5*SQRT(PI))) + IF (X.EQ. 3. ) GAMMAF = 2. + IF (X.EQ. 4. ) GAMMAF = 6. + IF (X.EQ. 5. ) GAMMAF = 24. + IF (X.EQ. 6. ) GAMMAF = 120. + RETURN + END +C + REAL*8 FUNCTION PNORMJ (N,ALPHA,BETA) + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 ALPHA,BETA + ONE = 1. + TWO = 2. + DN = ((N)) + CONST = ALPHA+BETA+ONE + IF (N.LE.1) THEN + PROD = GAMMAF(DN+ALPHA)*GAMMAF(DN+BETA) + PROD = PROD/(GAMMAF(DN)*GAMMAF(DN+ALPHA+BETA)) + PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) + RETURN + ENDIF + PROD = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE) + PROD = PROD/(TWO*(ONE+CONST)*GAMMAF(CONST+ONE)) + PROD = PROD*(ONE+ALPHA)*(TWO+ALPHA) + PROD = PROD*(ONE+BETA)*(TWO+BETA) + DO 100 I=3,N + DINDX = ((I)) + FRAC = (DINDX+ALPHA)*(DINDX+BETA)/(DINDX*(DINDX+ALPHA+BETA)) + PROD = PROD*FRAC + 100 CONTINUE + PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) + RETURN + END +C + SUBROUTINE JACG (XJAC,NP,ALPHA,BETA) +C-------------------------------------------------------------------- +C +C Compute NP Gauss points XJAC, which are the zeros of the +C Jacobi polynomial J(NP) with parameters ALPHA and BETA. +C ALPHA and BETA determines the specific type of Gauss points. +C Examples: +C ALPHA = BETA = 0.0 -> Legendre points +C ALPHA = BETA = -0.5 -> Chebyshev points +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 XJAC(1) + DATA KSTOP /10/ + DATA EPS/1.0e-12/ + N = NP-1 + one = 1. + DTH = 4.*ATAN(one)/(2.*((N))+2.) + DO 40 J=1,NP + IF (J.EQ.1) THEN + X = COS((2.*(((J))-1.)+1.)*DTH) + ELSE + X1 = COS((2.*(((J))-1.)+1.)*DTH) + X2 = XLAST + X = (X1+X2)/2. + ENDIF + DO 30 K=1,KSTOP + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,X) + RECSUM = 0. + JM = J-1 + DO 29 I=1,JM + RECSUM = RECSUM+1./(X-XJAC(NP-I+1)) + 29 CONTINUE + DELX = -P/(PD-RECSUM*P) + X = X+DELX + IF (ABS(DELX) .LT. EPS) GOTO 31 + 30 CONTINUE + 31 CONTINUE + XJAC(NP-J+1) = X + XLAST = X + 40 CONTINUE + DO 200 I=1,NP + XMIN = 2. + DO 100 J=I,NP + IF (XJAC(J).LT.XMIN) THEN + XMIN = XJAC(J) + JMIN = J + ENDIF + 100 CONTINUE + IF (JMIN.NE.I) THEN + SWAP = XJAC(I) + XJAC(I) = XJAC(JMIN) + XJAC(JMIN) = SWAP + ENDIF + 200 CONTINUE + RETURN + END +C + SUBROUTINE JACOBF (POLY,PDER,POLYM1,PDERM1,POLYM2,PDERM2, + $ N,ALP,BET,X) +C-------------------------------------------------------------------- +C +C Computes the Jacobi polynomial (POLY) and its derivative (PDER) +C of degree N at X. +C +C-------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + APB = ALP+BET + POLY = 1. + PDER = 0. + IF (N .EQ. 0) RETURN + POLYL = POLY + PDERL = PDER + POLY = (ALP-BET+(APB+2.)*X)/2. + PDER = (APB+2.)/2. + IF (N .EQ. 1) RETURN + DO 20 K=2,N + DK = ((K)) + A1 = 2.*DK*(DK+APB)*(2.*DK+APB-2.) + A2 = (2.*DK+APB-1.)*(ALP**2-BET**2) + B3 = (2.*DK+APB-2.) + A3 = B3*(B3+1.)*(B3+2.) + A4 = 2.*(DK+ALP-1.)*(DK+BET-1.)*(2.*DK+APB) + POLYN = ((A2+A3*X)*POLY-A4*POLYL)/A1 + PDERN = ((A2+A3*X)*PDER-A4*PDERL+A3*POLY)/A1 + PSAVE = POLYL + PDSAVE = PDERL + POLYL = POLY + POLY = POLYN + PDERL = PDER + PDER = PDERN + 20 CONTINUE + POLYM1 = POLYL + PDERM1 = PDERL + POLYM2 = PSAVE + PDERM2 = PDSAVE + RETURN + END +C + REAL FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGJ through +C the NP Gauss Jacobi points ZGJ at the point Z. +C Single precision version. +C +C--------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD,ZGJD(NZD) + REAL Z,ZGJ(1),ALPHA,BETA + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in HGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ZD = Z + DO 100 I=1,NP + ZGJD(I) = ZGJ(I) + 100 CONTINUE + ALPHAD = ALPHA + BETAD = BETA + HGJ = HGJD (II,ZD,ZGJD,NP,ALPHAD,BETAD) + RETURN + END +C + REAL*8 FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGJD through +C the NZ Gauss-Lobatto Jacobi points ZGJ at the point Z. +C Double precision version. +C +C--------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z,ZGJ(1),ALPHA,BETA + EPS = 1.e-5 + ONE = 1. + ZI = ZGJ(II) + DZ = Z-ZI + IF (ABS(DZ).LT.EPS) THEN + HGJD = ONE + RETURN + ENDIF + CALL JACOBF (PZI,PDZI,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,ZI) + CALL JACOBF (PZ,PDZ,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,Z) + HGJD = PZ/(PDZI*(Z-ZI)) + RETURN + END +C + REAL FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGLJ through +C the NZ Gauss-Lobatto Jacobi points ZGLJ at the point Z. +C Single precision version. +C +C--------------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL*8 ZD,ZGLJD(NZD) + REAL Z,ZGLJ(1),ALPHA,BETA + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in HGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call exitt + ENDIF + ZD = Z + DO 100 I=1,NP + ZGLJD(I) = ZGLJ(I) + 100 CONTINUE + ALPHAD = ALPHA + BETAD = BETA + HGLJ = HGLJD (II,ZD,ZGLJD,NP,ALPHAD,BETAD) + RETURN + END +C + REAL*8 FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGLJD through +C the NZ Gauss-Lobatto Jacobi points ZJACL at the point Z. +C Double precision version. +C +C--------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 Z,ZGLJ(1),ALPHA,BETA + EPS = 1.e-5 + ONE = 1. + ZI = ZGLJ(I) + DZ = Z-ZI + IF (ABS(DZ).LT.EPS) THEN + HGLJD = ONE + RETURN + ENDIF + N = NP-1 + DN = ((N)) + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,ZI) + CONST = EIGVAL*PI+ALPHA*(ONE+ZI)*PDI-BETA*(ONE-ZI)*PDI + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z) + HGLJD = (ONE-Z**2)*PD/(CONST*(Z-ZI)) + RETURN + END +C + SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss Jacobi points Z. +C Note: D and DT are square matrices. +C Single precision version. +C +C----------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZDD = NMAX) + REAL*8 DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD) + REAL D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA +C + IF (NZ.LE.0) THEN + WRITE (6,*) 'DGJ: Minimum number of Gauss points is 1' + call exitt + ENDIF + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Too large polynomial degree in DGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here Nz=',Nz + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE (6,*) 'DGJ: Alpha and Beta must be greater than -1' + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NZ + ZD(I) = Z(I) + 100 CONTINUE + CALL DGJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) + DO 200 I=1,NZ + DO 200 J=1,NZ + D(I,J) = DD(I,J) + DT(I,J) = DTD(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss Jacobi points Z. +C Note: D and DT are square matrices. +C Double precision version. +C +C----------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + N = NZ-1 + DN = ((N)) + ONE = 1. + TWO = 2. +C + IF (NZ.LE.1) THEN + WRITE (6,*) 'DGJD: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'DGJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + DO 200 I=1,NZ + DO 200 J=1,NZ + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(J)) + IF (I.NE.J) D(I,J) = PDI/(PDJ*(Z(I)-Z(J))) + IF (I.EQ.J) D(I,J) = ((ALPHA+BETA+TWO)*Z(I)+ALPHA-BETA)/ + $ (TWO*(ONE-Z(I)**2)) + DT(J,I) = D(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss-Lobatto Jacobi points Z. +C Note: D and DT are square matrices. +C Single precision version. +C +C----------------------------------------------------------------- + PARAMETER (NMAX=84) + PARAMETER (NZDD = NMAX) + REAL*8 DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD) + REAL D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA +C + IF (NZ.LE.1) THEN + WRITE (6,*) 'DGLJ: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Too large polynomial degree in DGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NZ=',NZ + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE (6,*) 'DGLJ: Alpha and Beta must be greater than -1' + call exitt + ENDIF + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NZ + ZD(I) = Z(I) + 100 CONTINUE + CALL DGLJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) + DO 200 I=1,NZ + DO 200 J=1,NZ + D(I,J) = DD(I,J) + DT(I,J) = DTD(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss-Lobatto Jacobi points Z. +C Note: D and DT are square matrices. +C Double precision version. +C +C----------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + N = NZ-1 + DN = ((N)) + ONE = 1. + TWO = 2. + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) +C + IF (NZ.LE.1) THEN + WRITE (6,*) 'DGLJD: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'DGLJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + DO 200 I=1,NZ + DO 200 J=1,NZ + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(J)) + CI = EIGVAL*PI-(BETA*(ONE-Z(I))-ALPHA*(ONE+Z(I)))*PDI + CJ = EIGVAL*PJ-(BETA*(ONE-Z(J))-ALPHA*(ONE+Z(J)))*PDJ + IF (I.NE.J) D(I,J) = CI/(CJ*(Z(I)-Z(J))) + IF ((I.EQ.J).AND.(I.NE.1).AND.(I.NE.NZ)) + $ D(I,J) = (ALPHA*(ONE+Z(I))-BETA*(ONE-Z(I)))/ + $ (TWO*(ONE-Z(I)**2)) + IF ((I.EQ.J).AND.(I.EQ.1)) + $ D(I,J) = (EIGVAL+ALPHA)/(TWO*(BETA+TWO)) + IF ((I.EQ.J).AND.(I.EQ.NZ)) + $ D(I,J) = -(EIGVAL+BETA)/(TWO*(ALPHA+TWO)) + DT(J,I) = D(I,J) + 200 CONTINUE + RETURN + END +C + SUBROUTINE DGLL (D,DT,Z,NZ,NZD) +C----------------------------------------------------------------- +C +C Compute the derivative matrix D and its transpose DT +C associated with the Nth order Lagrangian interpolants +C through the NZ Gauss-Lobatto Legendre points Z. +C Note: D and DT are square matrices. +C +C----------------------------------------------------------------- + PARAMETER (NMAX=84) + REAL D(NZD,NZD),DT(NZD,NZD),Z(1) + N = NZ-1 + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Subroutine DGLL' + WRITE (6,*) 'Maximum polynomial degree =',NMAX + WRITE (6,*) 'Polynomial degree =',NZ + ENDIF + IF (NZ .EQ. 1) THEN + D(1,1) = 0. + RETURN + ENDIF + FN = (N) + d0 = FN*(FN+1.)/4. + DO 200 I=1,NZ + DO 200 J=1,NZ + D(I,J) = 0. + IF (I.NE.J) D(I,J) = PNLEG(Z(I),N)/ + $ (PNLEG(Z(J),N)*(Z(I)-Z(J))) + IF ((I.EQ.J).AND.(I.EQ.1)) D(I,J) = -d0 + IF ((I.EQ.J).AND.(I.EQ.NZ)) D(I,J) = d0 + DT(J,I) = D(I,J) + 200 CONTINUE + RETURN + END +C + REAL FUNCTION HGLL (I,Z,ZGLL,NZ) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant L through +C the NZ Gauss-Lobatto Legendre points ZGLL at the point Z. +C +C--------------------------------------------------------------------- + REAL ZGLL(1) + EPS = 1.E-5 + DZ = Z - ZGLL(I) + IF (ABS(DZ) .LT. EPS) THEN + HGLL = 1. + RETURN + ENDIF + N = NZ - 1 + ALFAN = (N)*((N)+1.) + HGLL = - (1.-Z*Z)*PNDLEG(Z,N)/ + $ (ALFAN*PNLEG(ZGLL(I),N)*(Z-ZGLL(I))) + RETURN + END +C + REAL FUNCTION HGL (I,Z,ZGL,NZ) +C--------------------------------------------------------------------- +C +C Compute the value of the Lagrangian interpolant HGL through +C the NZ Gauss Legendre points ZGL at the point Z. +C +C--------------------------------------------------------------------- + REAL ZGL(1) + EPS = 1.E-5 + DZ = Z - ZGL(I) + IF (ABS(DZ) .LT. EPS) THEN + HGL = 1. + RETURN + ENDIF + N = NZ-1 + HGL = PNLEG(Z,NZ)/(PNDLEG(ZGL(I),NZ)*(Z-ZGL(I))) + RETURN + END +C + REAL FUNCTION PNLEG (Z,N) +C--------------------------------------------------------------------- +C +C Compute the value of the Nth order Legendre polynomial at Z. +C (Simpler than JACOBF) +C Based on the recursion formula for the Legendre polynomials. +C +C--------------------------------------------------------------------- +C +C This next statement is to overcome the underflow bug in the i860. +C It can be removed at a later date. 11 Aug 1990 pff. +C + IF(ABS(Z) .LT. 1.0E-25) Z = 0.0 +C + P1 = 1. + IF (N.EQ.0) THEN + PNLEG = P1 + RETURN + ENDIF + P2 = Z + P3 = P2 + DO 10 K = 1, N-1 + FK = (K) + P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) + P1 = P2 + P2 = P3 + 10 CONTINUE + PNLEG = P3 + if (n.eq.0) pnleg = 1. + RETURN + END +C + REAL FUNCTION PNDLEG (Z,N) +C---------------------------------------------------------------------- +C +C Compute the derivative of the Nth order Legendre polynomial at Z. +C (Simpler than JACOBF) +C Based on the recursion formula for the Legendre polynomials. +C +C---------------------------------------------------------------------- + P1 = 1. + P2 = Z + P1D = 0. + P2D = 1. + P3D = 1. + DO 10 K = 1, N-1 + FK = (K) + P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) + P3D = ((2.*FK+1.)*P2 + (2.*FK+1.)*Z*P2D - FK*P1D)/(FK+1.) + P1 = P2 + P2 = P3 + P1D = P2D + P2D = P3D + 10 CONTINUE + PNDLEG = P3D + IF (N.eq.0) pndleg = 0. + RETURN + END +C + SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2) +C----------------------------------------------------------------------- +C +C Compute the (one-dimensional) derivative matrix D and its +C transpose DT associated with taking the derivative of a variable +C expanded on a Gauss-Lobatto Legendre mesh (M1), and evaluate its +C derivative on a Guass Legendre mesh (M2). +C Need the one-dimensional interpolation operator IM12 +C (see subroutine IGLLGL). +C Note: D and DT are rectangular matrices. +C +C----------------------------------------------------------------------- + REAL D(ND2,ND1), DT(ND1,ND2), ZM1(ND1), ZM2(ND2), IM12(ND2,ND1) + IF (NZM1.EQ.1) THEN + D (1,1) = 0. + DT(1,1) = 0. + RETURN + ENDIF + EPS = 1.E-6 + NM1 = NZM1-1 + DO 10 IP = 1, NZM2 + DO 10 JQ = 1, NZM1 + ZP = ZM2(IP) + ZQ = ZM1(JQ) + IF ((ABS(ZP) .LT. EPS).AND.(ABS(ZQ) .LT. EPS)) THEN + D(IP,JQ) = 0. + ELSE + D(IP,JQ) = (PNLEG(ZP,NM1)/PNLEG(ZQ,NM1) + $ -IM12(IP,JQ))/(ZP-ZQ) + ENDIF + DT(JQ,IP) = D(IP,JQ) + 10 CONTINUE + RETURN + END +C + SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) +C----------------------------------------------------------------------- +C +C Compute the (one-dimensional) derivative matrix D and its +C transpose DT associated with taking the derivative of a variable +C expanded on a Gauss-Lobatto Jacobi mesh (M1), and evaluate its +C derivative on a Guass Jacobi mesh (M2). +C Need the one-dimensional interpolation operator IM12 +C (see subroutine IGLJGJ). +C Note: D and DT are rectangular matrices. +C Single precision version. +C +C----------------------------------------------------------------------- + REAL D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2), IGLG(ND2,ND1) + PARAMETER (NMAX=84) + PARAMETER (NDD = NMAX) + REAL*8 DD(NDD,NDD), DTD(NDD,NDD) + REAL*8 ZGD(NDD), ZGLD(NDD), IGLGD(NDD,NDD) + REAL*8 ALPHAD, BETAD +C + IF (NPGL.LE.1) THEN + WRITE(6,*) 'DGLJGJ: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF (NPGL.GT.NMAX) THEN + WRITE(6,*) 'Polynomial degree too high in DGLJGJ' + WRITE(6,*) 'Maximum polynomial degree is',NMAX + WRITE(6,*) 'Here NPGL=',NPGL + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE(6,*) 'DGLJGJ: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NPG + ZGD(I) = ZG(I) + DO 100 J=1,NPGL + IGLGD(I,J) = IGLG(I,J) + 100 CONTINUE + DO 200 I=1,NPGL + ZGLD(I) = ZGL(I) + 200 CONTINUE + CALL DGLJGJD (DD,DTD,ZGLD,ZGD,IGLGD,NPGL,NPG,NDD,NDD,ALPHAD,BETAD) + DO 300 I=1,NPG + DO 300 J=1,NPGL + D(I,J) = DD(I,J) + DT(J,I) = DTD(J,I) + 300 CONTINUE + RETURN + END +C + SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) +C----------------------------------------------------------------------- +C +C Compute the (one-dimensional) derivative matrix D and its +C transpose DT associated with taking the derivative of a variable +C expanded on a Gauss-Lobatto Jacobi mesh (M1), and evaluate its +C derivative on a Guass Jacobi mesh (M2). +C Need the one-dimensional interpolation operator IM12 +C (see subroutine IGLJGJ). +C Note: D and DT are rectangular matrices. +C Double precision version. +C +C----------------------------------------------------------------------- + IMPLICIT REAL*8 (A-H,O-Z) + REAL*8 D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2) + REAL*8 IGLG(ND2,ND1), ALPHA, BETA +C + IF (NPGL.LE.1) THEN + WRITE(6,*) 'DGLJGJD: Minimum number of Gauss-Lobatto points is 2' + call exitt + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE(6,*) 'DGLJGJD: Alpha and Beta must be greater than -1' + call exitt + ENDIF +C + EPS = 1.e-6 + ONE = 1. + TWO = 2. + NGL = NPGL-1 + DN = ((NGL)) + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) +C + DO 100 I=1,NPG + DO 100 J=1,NPGL + DZ = ABS(ZG(I)-ZGL(J)) + IF (DZ.LT.EPS) THEN + D(I,J) = (ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)))/ + $ (TWO*(ONE-ZG(I)**2)) + ELSE + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZG(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZGL(J)) + FACI = ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)) + FACJ = ALPHA*(ONE+ZGL(J))-BETA*(ONE-ZGL(J)) + CONST = EIGVAL*PJ+FACJ*PDJ + D(I,J) = ((EIGVAL*PI+FACI*PDI)*(ZG(I)-ZGL(J)) + $ -(ONE-ZG(I)**2)*PDI)/(CONST*(ZG(I)-ZGL(J))**2) + ENDIF + DT(J,I) = D(I,J) + 100 CONTINUE + RETURN + END +C + SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss Legendre mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss Legendre points. +C Z2 : NZ2 points on mesh M. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGL(J,ZI,Z1,NZ1) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END +c + SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss-Lobatto Legendre mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss-Lobatto Legendre points. +C Z2 : NZ2 points on mesh M. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGLL(J,ZI,Z1,NZ1) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END +C + SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss Jacobi mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss Jacobi points. +C Z2 : NZ2 points on mesh M. +C Single precision version. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGJ(J,ZI,Z1,NZ1,ALPHA,BETA) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END +c + SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) +C---------------------------------------------------------------------- +C +C Compute the one-dimensional interpolation operator (matrix) I12 +C ands its transpose IT12 for interpolating a variable from a +C Gauss-Lobatto Jacobi mesh (1) to a another mesh M (2). +C Z1 : NZ1 Gauss-Lobatto Jacobi points. +C Z2 : NZ2 points on mesh M. +C Single precision version. +C +C-------------------------------------------------------------------- + REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2) + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO 10 I=1,NZ2 + ZI = Z2(I) + DO 10 J=1,NZ1 + I12 (I,J) = HGLJ(J,ZI,Z1,NZ1,ALPHA,BETA) + IT12(J,I) = I12(I,J) + 10 CONTINUE + RETURN + END diff --git a/src/timers.c b/src/timers.c new file mode 100644 index 0000000..f40bf91 --- /dev/null +++ b/src/timers.c @@ -0,0 +1,24 @@ + +#include /* for struct timespec */ + +double fclock_gettime(void) { + + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + double timeval = (double) (ts.tv_sec + ts.tv_nsec*1.0E-9); + + return timeval; +} + +double fclock_gettime_( void ) __attribute__((alias("fclock_gettime"))); + +#ifdef BGQTIMER +#include + +double ReadTimeBase_Double( void ) { + return (double) GetTimeBase(); +} + +double readtimebase_double( void ) __attribute__((alias("ReadTimeBase_Double"))); +double readtimebase_double_( void ) __attribute__((alias("ReadTimeBase_Double"))); +#endif diff --git a/test/example1/SIZE b/test/example1/SIZE new file mode 100644 index 0000000..941aedc --- /dev/null +++ b/test/example1/SIZE @@ -0,0 +1,17 @@ +C Dimension file to be included + + parameter (ldim=3) ! dimension + parameter (lx1=12,ly1=lx1,lz1=lx1) ! polynomial order + + parameter (lp =49152) ! max number of processors + parameter (lelt= 512) ! max number of elements, per proc + + parameter (lelg=lelt*lp) ! max total elements in a test + parameter (lelx=lelg,lely=1,lelz=1) ! max elements in each direction + + parameter (ldimt=1,ldimt1=ldimt+1) ! used in 'include' files + + + common /dimn/ nelx,nely,nelz,nelt ! local element common block + $ , nx1,ny1,nz1,ndim,nfield,nid + diff --git a/test/example1/data.rea b/test/example1/data.rea new file mode 100644 index 0000000..36077f4 --- /dev/null +++ b/test/example1/data.rea @@ -0,0 +1,5 @@ +.true. = ifbrick ! brick or linear geometry +512 512 1 = iel0,ielN,ielD (per processor) ! range of number of elements per proc. + 9 12 3 = nx0,nxN,nxD ! poly. order range for nx1 + 1 1 1 = npx, npy, npz ! processor distribution in x,y,z + 1 1 1 = mx, my, mz ! local element distribution in x,y,z diff --git a/test/example1/makefile.template b/test/example1/makefile.template new file mode 100644 index 0000000..81d8805 --- /dev/null +++ b/test/example1/makefile.template @@ -0,0 +1,140 @@ +BINNAME=nekbone +CASENAME= +CASEDIR= +S= +J:=$S/jl +OPT_INCDIR:=./ +OBJDIR=obj +IFMPI= +F77= +CC= +P= +PPPO= +PPS= +G= +OPT_FLAGS_STD= +USR= +USR_LFLAGS= + +################################################################################ + +lFLAGS = $(USR_LFLAGS) + +PPS_F = $(patsubst %,$(PPPO)-D%,$(PPS)) +PPS_C = $(patsubst %,-D%,$(PPS)) + +#NEW ######################################################################### +EXTRA = cg.o driver.o math.o omp.o mxm_wrapper.o prox_dssum.o prox_setup.o semhat.o \ +speclib.o timers.o + +################################################################################ +# MXM +MXM = mxm_std.o + +# JL Routines ################################################################### +JO = jl_ +JL := -DPREFIX=jl_ + +JLCORE = $(JO)gs.o $(JO)sort.o $(JO)sarray_transfer.o $(JO)sarray_sort.o \ +$(JO)gs_local.o $(JO)crystal.o $(JO)comm.o $(JO)tensor.o $(JO)fail.o \ +$(JO)fcrystal.o + +COMM_MPI := comm_mpi.o +ifeq ($(IFMPI),false) + COMM_MPI := ${COMM_MPI} mpi_dummy.o +endif + +ifeq ($(IFMPI),false) + DUMMY:= $(shell cp $S/mpi_dummy.h $S/mpif.h) +else + DUMMY:= $(shell rm -rf $S/mpif.h) +endif + +##################################################################################### +TMP0 = $(EXTRA) $(COMM_MPI) $(MXM) +NOBJS_F0 = $(patsubst %,$(OBJDIR)/%,$(TMP0)) +TMP0c = $(JLCORE) +NOBJS_C0 = $(patsubst %,$(OBJDIR)/%,$(TMP0c)) + +NOBJS0 = $(NOBJS_F0) $(NOBJS_C0) +############################################################################## + +L0 = $(G) -O0 +L2 = $(G) $(OPT_FLAGS_STD) +L3 = $(G) $(OPT_FLAGS_STD) +L4 = $(L3) + +FL0 = $(L0) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL2i4 = $(L2) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL2 = $(L2) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL3 = $(L3) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) +FL4 = $(L4) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR) + +cFL0 = $(L0) $(PPS_C) +cFL2 = $(L2) $(PPS_C) +cFL3 = $(L3) $(PPS_C) +cFL4 = $(L4) $(PPS_C) +################################################################################ +all : nekbone + +objdir: + @mkdir $(OBJDIR) 2>/dev/null; cat /dev/null + +nekbone: objdir $(NOBJS0) + $(F77) -o ${BINNAME} $G $(NOBJS0) $(lFLAGS) + @if test -f ${BINNAME}; then \ + echo "#############################################################"; \ + echo "# Compilation successful! #"; \ + echo "#############################################################"; \ + size ${BINNAME}; \ + echo ""; \ + else \ + echo -e "\033[1;31;38m" "ERROR: Compilation failed!"; \ + echo -e "\033[0m"; \ + fi +ifeq ($(IFMPI),false) + @rm -rf $S/mpif.h +endif + +clean: + rm -rf ./obj ${BINNAME} +ifeq ($(IFMPI),false) + @rm -rf $S/mpif.h +endif + +$(NOBJS_F0) : SIZE +# CORE ############################################################################ +$(OBJDIR)/cg.o :$S/cg.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/driver.o :$S/driver.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/math.o :$S/math.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/omp.o :$S/omp.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/prox_dssum.o :$S/prox_dssum.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/prox_setup.o :$S/prox_setup.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/semhat.o :$S/semhat.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/speclib.o :$S/speclib.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/blas.o :$S/blas.f; $(F77) -c $(FL2i4) $< -o $@ +$(OBJDIR)/byte_mpi.o :$S/byte_mpi.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/comm_mpi.o :$S/comm_mpi.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/mpi_dummy.o :$S/mpi_dummy.f; $(F77) -c $(FL2) $< -o $@ +# MXM ############################################################################ +$(OBJDIR)/mxm_wrapper.o :$S/mxm_wrapper.f; $(F77) -c $(FL2) $< -o $@ +$(OBJDIR)/mxm_std.o :$S/mxm_std.f; $(F77) -c $(FL4) $< -o $@ +$(OBJDIR)/bg_aligned3.o :$S/bg_aligned3.s; $(CC) -c $< -o $@ +$(OBJDIR)/bg_mxm3.o :$S/bg_mxm3.s; $(CC) -c $< -o $@ +$(OBJDIR)/bg_mxm44.o :$S/bg_mxm44.s; $(CC) -c $< -o $@ +$(OBJDIR)/bg_mxm44_uneven.o :$S/bg_mxm44_uneven.s; $(CC) -c $< -o $@ +$(OBJDIR)/k10_mxm.o :$S/k10_mxm.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +# C Files ################################################################################## +$(OBJDIR)/byte.o :$S/byte.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/chelpers.o :$S/chelpers.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/timers.o :$S/timers.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)fail.o :$(J)/fail.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)tensor.o :$(J)/tensor.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)sort.o :$(J)/sort.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)sarray_sort.o :$(J)/sarray_sort.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)comm.o :$(J)/comm.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)crystal.o :$(J)/crystal.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)sarray_transfer.o :$(J)/sarray_transfer.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)fcrystal.o :$(J)/fcrystal.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)gs.o :$(J)/gs.c; $(CC) -c $(cFL2) $(JL) $< -o $@ +$(OBJDIR)/$(JO)gs_local.o :$(J)/gs_local.c; $(CC) -c $(cFL2) $(JL) $< -o $@ diff --git a/test/example1/makenek b/test/example1/makenek new file mode 100755 index 0000000..228bcd1 --- /dev/null +++ b/test/example1/makenek @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="$HOME/Nekbone/src" + +# Fortran compiler +F77="mpif77" + +# C compiler +CC="mpicc" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +# NEKCOMM, NEKDLAY, BG, MGRID +#PPLIST="?" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +#USR_LFLAGS="-L/usr/lib -lfoo" + +# generic compiler flags +#G="-g" + +# optimization flags +#OPT_FLAGS_STD="" +#OPT_FLAGS_MAG="" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/makenek-bgq b/test/example1/makenek-bgq new file mode 100755 index 0000000..5210815 --- /dev/null +++ b/test/example1/makenek-bgq @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="../../src" + +# Fortran compiler +F77="mpixlf77_r -qsmp=omp -qnosave" + +# C compiler +CC="mpixlc_r -qsmp=omp" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8" +PPLIST="TIMERS BGQTIMERS" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +#USR_LFLAGS="-L/usr/lib/ -lfoo" + +# generic compiler flags +#G="-g" + +# optimization flags +OPT_FLAGS_STD="-O3" +OPT_FLAGS_MAG="-O3" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/makenek-cray-knl b/test/example1/makenek-cray-knl new file mode 100755 index 0000000..c0658f2 --- /dev/null +++ b/test/example1/makenek-cray-knl @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="../../src" + +# Fortran compiler +F77="ftn" + +# C compiler +CC="cc" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8" +PPLIST="TIMERS CGTIMERS" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +USR_LFLAGS="-qopenmp -dynamic -mcmodel=medium -shared-intel" + +# generic compiler flags +#G="-g" + +# optimization flags +OPT_FLAGS_STD="-qopenmp -dynamic -O3 -g -xMIC-AVX512 -mcmodel=medium -shared-intel " +OPT_FLAGS_MAG="-qopenmp -dynamic -O3 -g -xMIC-AVX512 -mcmodel=medium -shared-intel" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/makenek-intel b/test/example1/makenek-intel new file mode 100755 index 0000000..209dfc3 --- /dev/null +++ b/test/example1/makenek-intel @@ -0,0 +1,55 @@ +#!/bin/bash +# Nek5000 build config file +# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC + +# source path +SOURCE_ROOT="../../src" + +# Fortran compiler +F77="mpiifort" + +# C compiler +CC="mpiicc" + +# pre-processor symbol list +# (set PPLIST=? to get a list of available symbols) +#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8" +PPLIST="TIMERS CGTIMERS" + + +# OPTIONAL SETTINGS +# ----------------- + +# enable MPI (default true) +#IFMPI="false" + +# auxilliary files to compile +# NOTE: source files have to located in the same directory as makenek +# a makefile_usr.inc has to be provided containing the build rules +#USR="foo.o" + +# linking flags +USR_LFLAGS="-qopenmp -mcmodel=medium -shared-intel" + +# generic compiler flags +#G="-g" + +# optimization flags +OPT_FLAGS_STD="-qopenmp -O3 -g -xHost -mcmodel=medium -shared-intel" +OPT_FLAGS_MAG="-qopenmp -O3 -g -xHost -mcmodel=medium -shared-intel" + +############################################################################### +# DONT'T TOUCH WHAT FOLLOWS !!! +############################################################################### +# assign version tag +mver=1 +# overwrite source path with optional 2nd argument +if [ -d $2 ] && [ $# -eq 2 ]; then + SOURCE_ROOT="$2" + echo "change source code directory to: ", $SOURCE_ROOT +fi +# do some checks and create makefile +source $SOURCE_ROOT/makenek.inc +# compile +make -j4 -f makefile 2>&1 | tee compiler.out +exit 0 diff --git a/test/example1/nekpmpi b/test/example1/nekpmpi new file mode 100755 index 0000000..1c4cc8a --- /dev/null +++ b/test/example1/nekpmpi @@ -0,0 +1,4 @@ +rm -f logfile +mv $1.log.$2 $1.log1.$2 +mpiexec -np $2 ./nekbone $1 > $1.log.$2 +ln $1.log.$2 logfile