From 72b42d711627274117675b83cb60186fdbabc038 Mon Sep 17 00:00:00 2001
From: Abhik Sarkar <sarkar6@llnl.gov>
Date: Fri, 18 Feb 2022 14:24:43 -0800
Subject: [PATCH] Creating nekbone repo from
 https://asc.llnl.gov/coral-2-benchmarks

---
 CHANGES                         |    41 +
 COPYRIGHT                       |    34 +
 README.md                       |     1 +
 readme.pdf                      |   Bin 0 -> 135910 bytes
 src/DXYZ                        |     5 +
 src/INPUT                       |    19 +
 src/MASS                        |     4 +
 src/PARALLEL                    |    31 +
 src/README                      |    66 +
 src/TIMER                       |    19 +
 src/TOTAL                       |     5 +
 src/WZ                          |     7 +
 src/bg_aligned3.s               |    41 +
 src/bg_mxm3.s                   |   406 +
 src/bg_mxm44.s                  |   497 +
 src/bg_mxm44_uneven.s           |    82 +
 src/blas.f                      | 30886 ++++++++++++++++++++++++++++++
 src/byte_mpi.f                  |   209 +
 src/cg.f                        |   335 +
 src/comm_mpi.f                  |  1212 ++
 src/driver.f                    |   660 +
 src/driver_comm.f               |    21 +
 src/jl/Makefile                 |    91 +
 src/jl/README                   |    69 +
 src/jl/c99.h                    |    16 +
 src/jl/cdep.py                  |    33 +
 src/jl/comm.c                   |   175 +
 src/jl/comm.h                   |   255 +
 src/jl/crs.h                    |    24 +
 src/jl/crs_test.c               |   116 +
 src/jl/crystal.c                |   141 +
 src/jl/crystal.h                |    21 +
 src/jl/crystal_test.c           |    88 +
 src/jl/fail.c                   |    53 +
 src/jl/fail.h                   |    52 +
 src/jl/fcrystal.c               |   191 +
 src/jl/gen_poly_imp.c           |   227 +
 src/jl/gs.c                     |  1503 ++
 src/jl/gs.h                     |   141 +
 src/jl/gs_defs.h                |    81 +
 src/jl/gs_local.c               |   336 +
 src/jl/gs_local.h               |    43 +
 src/jl/gs_test.c                |    68 +
 src/jl/gs_test_old.c            |   147 +
 src/jl/gs_unique_test.c         |    72 +
 src/jl/makefile.cdep            |    48 +
 src/jl/mem.h                    |   168 +
 src/jl/name.h                   |    44 +
 src/jl/odep_info.py             |    50 +
 src/jl/rand_elt_test.c          |   169 +
 src/jl/rand_elt_test.h          |    18 +
 src/jl/rdtsc.h                  |    12 +
 src/jl/sarray_sort.c            |    45 +
 src/jl/sarray_sort.h            |    89 +
 src/jl/sarray_sort_test.c       |    47 +
 src/jl/sarray_transfer.c        |   197 +
 src/jl/sarray_transfer.h        |    95 +
 src/jl/sarray_transfer_test.c   |    93 +
 src/jl/sort.c                   |    31 +
 src/jl/sort.h                   |    76 +
 src/jl/sort_imp.h               |   543 +
 src/jl/sort_test.c              |   113 +
 src/jl/sort_test2.c             |    74 +
 src/jl/spchol_test.c            |    54 +
 src/jl/tensor.c                 |    82 +
 src/jl/tensor.h                 |   199 +
 src/jl/types.h                  |    79 +
 src/k10_mxm.c                   |    56 +
 src/makenek.inc                 |   324 +
 src/math.f                      |  1402 ++
 src/mpi_dummy.f                 |  1053 +
 src/mpi_dummy.h                 |    61 +
 src/mxm_std.f                   |  4123 ++++
 src/mxm_wrapper.f               |   165 +
 src/omp.f                       |   128 +
 src/prox_dssum.f                |   174 +
 src/prox_setup.f                |   113 +
 src/semhat.f                    |    94 +
 src/speclib.f                   |  1176 ++
 src/timers.c                    |    24 +
 test/example1/SIZE              |    17 +
 test/example1/data.rea          |     5 +
 test/example1/makefile.template |   140 +
 test/example1/makenek           |    55 +
 test/example1/makenek-bgq       |    55 +
 test/example1/makenek-cray-knl  |    55 +
 test/example1/makenek-intel     |    55 +
 test/example1/nekpmpi           |     4 +
 88 files changed, 50129 insertions(+)
 create mode 100644 CHANGES
 create mode 100644 COPYRIGHT
 create mode 100644 README.md
 create mode 100644 readme.pdf
 create mode 100644 src/DXYZ
 create mode 100644 src/INPUT
 create mode 100644 src/MASS
 create mode 100644 src/PARALLEL
 create mode 100644 src/README
 create mode 100644 src/TIMER
 create mode 100644 src/TOTAL
 create mode 100644 src/WZ
 create mode 100644 src/bg_aligned3.s
 create mode 100644 src/bg_mxm3.s
 create mode 100644 src/bg_mxm44.s
 create mode 100644 src/bg_mxm44_uneven.s
 create mode 100644 src/blas.f
 create mode 100644 src/byte_mpi.f
 create mode 100644 src/cg.f
 create mode 100644 src/comm_mpi.f
 create mode 100644 src/driver.f
 create mode 100644 src/driver_comm.f
 create mode 100644 src/jl/Makefile
 create mode 100644 src/jl/README
 create mode 100644 src/jl/c99.h
 create mode 100755 src/jl/cdep.py
 create mode 100644 src/jl/comm.c
 create mode 100644 src/jl/comm.h
 create mode 100644 src/jl/crs.h
 create mode 100644 src/jl/crs_test.c
 create mode 100644 src/jl/crystal.c
 create mode 100644 src/jl/crystal.h
 create mode 100644 src/jl/crystal_test.c
 create mode 100644 src/jl/fail.c
 create mode 100644 src/jl/fail.h
 create mode 100644 src/jl/fcrystal.c
 create mode 100644 src/jl/gen_poly_imp.c
 create mode 100644 src/jl/gs.c
 create mode 100644 src/jl/gs.h
 create mode 100644 src/jl/gs_defs.h
 create mode 100644 src/jl/gs_local.c
 create mode 100644 src/jl/gs_local.h
 create mode 100644 src/jl/gs_test.c
 create mode 100644 src/jl/gs_test_old.c
 create mode 100644 src/jl/gs_unique_test.c
 create mode 100644 src/jl/makefile.cdep
 create mode 100644 src/jl/mem.h
 create mode 100644 src/jl/name.h
 create mode 100755 src/jl/odep_info.py
 create mode 100644 src/jl/rand_elt_test.c
 create mode 100644 src/jl/rand_elt_test.h
 create mode 100644 src/jl/rdtsc.h
 create mode 100644 src/jl/sarray_sort.c
 create mode 100644 src/jl/sarray_sort.h
 create mode 100644 src/jl/sarray_sort_test.c
 create mode 100644 src/jl/sarray_transfer.c
 create mode 100644 src/jl/sarray_transfer.h
 create mode 100644 src/jl/sarray_transfer_test.c
 create mode 100644 src/jl/sort.c
 create mode 100644 src/jl/sort.h
 create mode 100644 src/jl/sort_imp.h
 create mode 100644 src/jl/sort_test.c
 create mode 100644 src/jl/sort_test2.c
 create mode 100644 src/jl/spchol_test.c
 create mode 100644 src/jl/tensor.c
 create mode 100644 src/jl/tensor.h
 create mode 100644 src/jl/types.h
 create mode 100644 src/k10_mxm.c
 create mode 100644 src/makenek.inc
 create mode 100644 src/math.f
 create mode 100644 src/mpi_dummy.f
 create mode 100644 src/mpi_dummy.h
 create mode 100644 src/mxm_std.f
 create mode 100644 src/mxm_wrapper.f
 create mode 100644 src/omp.f
 create mode 100644 src/prox_dssum.f
 create mode 100644 src/prox_setup.f
 create mode 100644 src/semhat.f
 create mode 100644 src/speclib.f
 create mode 100644 src/timers.c
 create mode 100644 test/example1/SIZE
 create mode 100644 test/example1/data.rea
 create mode 100644 test/example1/makefile.template
 create mode 100755 test/example1/makenek
 create mode 100755 test/example1/makenek-bgq
 create mode 100755 test/example1/makenek-cray-knl
 create mode 100755 test/example1/makenek-intel
 create mode 100755 test/example1/nekpmpi

diff --git a/CHANGES b/CHANGES
new file mode 100644
index 0000000..28b5f6b
--- /dev/null
+++ b/CHANGES
@@ -0,0 +1,41 @@
+***********************************************************
+*              Changes in 2.0                             *
+***********************************************************
+ -Subroutine gsync() has changed to nekgsync() to avoid
+	possible conflict on certain architectures
+
+ -Executable is renamed 'nekbone' to replace 'nekproxy' and
+	other naming changes.
+
+ -iel0 and ielN  set in data.rea file are now used to control the
+ 	range of tests ran.  Test range in size from iel0
+	to ielN elements per process.  (prevoiusly tests 
+	were ran from 1 to lelt elements per process)  The
+	maximum value of ielN is lelt.
+
+ -nx0 and nxN set in data.rea file are now used to control the
+	range of polynomial orders.   Ranging from nx0 to 
+	nxN, where nxN<=lx1 (which is set in SIZE).  Previously
+	tests only ran with nx1=lx1.  The default is set to
+	reflect this, but nekbone now supports a range of
+	polynomial orders without recompiling the code.
+***********************************************************
+*              Changes in 2.1                             *
+***********************************************************
+ -Fixed nx0 and nxN control of polynomial order.  Default is
+ 	now to use lx1 until further notice.  Variable 
+	nx1 caused memory unstabilities and needs further 
+	development.
+ -Fixed a memory copy bug in the jl/ array transfer code.  
+	sarray_trasfer, used for the tuple transfer, should
+	be fixed now.
+
+***********************************************************
+*              Changes in 2.3                             *
+***********************************************************
+ - added OpenMP parallelism, MPITHREADS preprocessor macro
+   controls if MPI is called from one or multiple threads
+ - added timers controlled by TIMERS preprocessor macro
+ - fixed gather-scatter operation gsop() to always use pairwise
+   method
+ - switched to using system_clock routine in dummy mpi_wtime()
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..9f84af5
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,34 @@
+				  COPYRIGHT
+
+The following is a notice of limited availability of the code, and disclaimer
+which must be included in the prologue of the code and in all source listings
+of the code.
+
+Copyright Notice
+ + 2012 University of Chicago
+
+Permission is hereby granted to use, reproduce, prepare derivative works, and
+to redistribute to others.  This software was authored by:
+
+P. Fischer: (630) 252-6018; FAX: (630) 252-5986; email: fischer@mcs.anl.gov
+Mathematics and Computer Science Division
+Argonne National Laboratory, Argonne IL 60439
+
+			      GOVERNMENT LICENSE
+
+Portions of this material resulted from work developed under a U.S.
+Government Contract and are subject to the following license: the Government
+is granted for itself and others acting on its behalf a paid-up, nonexclusive,
+irrevocable worldwide license in this computer software to reproduce, prepare
+derivative works, and perform publicly and display publicly.
+
+				  DISCLAIMER
+
+This computer code material was prepared, in part, as an account of work
+sponsored by an agency of the United States Government.  Neither the United
+States, nor the University of Chicago, nor any of their employees, makes any
+warranty express or implied, or assumes any legal liability or responsibility
+for the accuracy, completeness, or usefulness of any information, apparatus,
+product, or process disclosed, or represents that its use would not infringe
+privately owned rights.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fab3aad
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# nekbone_2_3_5
diff --git a/readme.pdf b/readme.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..8c8aa70dce64d369b042400913963b32eea91d06
GIT binary patch
literal 135910
zcma%?Q*bWavbAH|wr$(CZQEY4ZQHi<##phjVmn!}lmFYh&N=_ZzS&oER^N`U>hV;M
zCRY-Zpl4=agCQTkN+^b5Ct@OUG_ir<<71RFcd&G`BI5X`qQWR)ZRck0Lc}OxXY6J!
zW^U?eW-cHA<Lc&OZfp<ZmFu1|=zzuy7k>MI#%)1P(S|=B&#h4%Dim0G67Catt%B4e
zM2+X=n>?*{DO1Qj-LL)8@{8A%kjOaj+wS(uy1R4L>4izSGv|g9?^pPBn_$>f8ThhL
zqL|dsy|oT1Gt#kGX^?zMcGxjnUxK3tD;b44&0V!ElZbF!!+QeBNa~^anwSbX;Ciu^
z<xjuMAdQh9Acs}SBwkifgT*R!jx?=##p{eklRoTVt{#~dBdS7K3e6>+usdBvjf_B(
z2xt_u{;WWl6OG~SS?*;dZm7)V(;tBw!IM44__Nt&TJ3O${h5gkQ>aVZK!L|X)R;mc
zJk+?${^0@NcfP~5?m81jE4QcKF6=yNknr@cyo&CEX%6X%sb3Dk!xZ<+-R?G8`z)F3
zO}oXiS>Q~Di%M+`eZNkBZG-8-N~OwYXO1)~)QRNx*&w&zW1I1X=7En8nC>*{-onh)
zUqa1I5-@p9GV)2h-&ryUA*T-xe-FC-@h0$vzFhaVRv|Gg?C@&3aRT@$AVUl5IHZ&H
zKE5VrirYaz|D0_xxI4dZ;0)PoAH@$^7i32`i0(UA)Tf{8`+kEQG){Y)JDB~Ci2wZl
zCH%kS{@2UK#tQR4CFcKZ3g-W+E3S3qUC}s^`sNxkQQ32<^D3WKR2+i1LeYr*>==q%
zhz#tYYVGaxl-{1KrCDoMy)1emcn8&@L)2Svq_-PBo^0W7Z5Bl6hx%`BzsujBsaGQ`
z(;WL61l5WGhT3Ji4gU6dK^iMLlT?M2<-h*6`$#+>)Oezgv|ZfHeU}4zil2aN2Q-42
zpDqezaE0yFYYwX=TCLRk{;196_{Nl3wK<&?awC|6Qq?)eW5DPjLCt);q74Sx7=$;8
z9Eu5o_R?D9hBYlo1b>-6xwdsw=bO2fdaxyIF7ZP{%wU7EZ2)mX<Wkzo+0S=~xQ`8(
zoQK&fcbyEHNaurqWgtXbG2yFmAJ#c23}-4-(L)SdM|VuJU4j#v#!;xwJ~#=&r8NTd
z4&B`dxMXo9PN_l7kgq${taPjFnZLOBF+|uC4sK?-Zc|j-&{o+UhvrL=jB*4Ii&|Z{
zw0wPOe2`loLq$j*V)s1CtP~TUE{xmWPrP5-UZ%bx)0qEEIvl$B1<X>TZmdQOqKP8Y
z=2*T_|GE?3lA@I^8uFhx{R?U~P6;YUiwu3Sk{zgC=8c9H3<=H+qZ{09AY)aRCeW9b
zOG4f5z7)=^6Rn3m1QwcDr{d-XgDQQOJqxn<X9s~XjR<J2gH-`T)|#qLPHP%!K}D~E
zSUkJs=g-oQ-QjiusEn#kHUcMkZmb>ByzpN-I?1~v?1Bnr5jG(HSt&etN#Z4g7WDg2
zq{>C5k<F72rc%1Xz)$Ajy4;p>uUj@?$57X(-vc2XNua|u`7EA{W6drG?1YE=6Fi92
zTfDj)#-2reZNwJcA-(3CO0`8Vb!91A5?B$ry`@*IV{21Fyi_Y3mcEPLa&UaoP@iuU
z0-Qh!&N{gVaDZuTBQhSzgtPRPvnGcmq)vg^GLzNrvFQ|*+f@5`OOnl`WP(s=HMa@Q
z^-luDD?O^;zSMYSfxo2)LUSs7WYNEW8OQ+JL-dMO(c@pmuaJcknaft9O~fq%pT>Nt
z`M9Z2@h{-DU_<URFGi6cqkP@aU5{t%jj)S|uWNoZ4~(9Fz6JgS%YgoQm;zr+7h-@V
zt5bQ9Y8GiB5D$k9!bFY9Rd~G@ACf!McU99a1?z`)caC9k1v;~<Dq<kpuW_w<GnQGt
zXIVmrk`*LF>V6|Wsv1lty9;;8S2%^9tgsfR3_boDTsU)*;9X*vA6N6F;xNtQa4W5U
zQBkCT^S1LhKDQc-cj32-@&Lf-JXXHbNa^oOp{qs1C`D>qkVI)lg9ezzxRn=?6)mT^
zqvQD|Yo6;VJ{jDX<bo5pr=~UE^<=EqU94qfXqE4~>^w{#eR7r1+Sh<#q!^m>zq8B$
z1wSL7hH=WVCs&k_Ud^Ms{Tk^>Zj2;7t;dKLqW1O^E8$sWD5z4dl8!@3%QYX7SDL^B
zeoY1vDPRhtD!KI~u0F$8U!953K;Q+R9b}b4O`!eo=CY}Z)Lvc_-54P*&GF?&L$)0m
z4QHe{8P+CJhq&*!YH?4JRmBM#|EUDS(RTu9t2K(M{NPw{#bu-1@XhTpo=nqFA{Dg_
z?Ka}|Lwgwm-Bvo}3%{{!e{!K>Do!IyV|@7{H5`-C7?$EEB+5@3h_Opw#F80UkOyQN
zw2q-=Adz0x42KbbW)^(<W7mdHw@PeNL{8D>)R@q+wxgouwcIwuQaqdx=kTIK`ll^#
zf0d_^IUSj)vKc&ZH%r1614GUphr{LYZZoK0MCT9B!1GE2lUd8K<rPEv>SyU~Yh#Pl
zZqA!5xP(R)6%$_E#f9MIlqrI0n<w6mQR9RTd83psMBpLeMLcn304}zyjWEee2UQuL
zR#Mfyr8P_0wPTp9V&oiRG#2<PV^fZ#19H&&r3wM<n@xYUg^~*U$5$@_4Mg|00e~sb
z-`I<$8w2=aHX4;3YY|8;q3#B~{=U1~q)ES0JG~MrdsDY@nh=ZR;PLfo-A^CQ*2oF*
zC2|m5y&sw;jel$^AH<!2E}^|cTTI4TFa0WaKDtHwBM^Nxzea@5C6HS#^XBMhDr)qs
zc+VV?>=Y_471DXFvG)ELTYKDXLEkJeE0J|##AE`7@}4-#uaB9(mYh4&#Aq}|E|`<)
zGcT=uD@HU|S5+~cddXtZJdGnR=&XLxTm0e6-!}2Nf^RQ$Jow|b_DZnhm_gzIqqM5{
zlYAg1J;(kQ5?|(F)5%Fp7AoO-H@w{lx@q~Hzv$)nO=K0!@Yg^QIyy?jVIBmcn+&+c
zcbY<x5WSCB8X<bc_M`eG%4yDcTE~88AU`&yK8hns@cZMTDTOSM62UdO_r{Wk4}g4$
z8!sXLy<7#+8<Y>0_KG6t7dt4}$(rzUdLsRV7&t3m`5)u---GgB^OTvBi|Ky{Dhtbh
zxGWad|Hfv`=t{fdcOV1$4e#QIQ@J5<d=;Yf6|T6P?Xo(!AaB&flv^uD?;8|31GB7+
z0{2Tu)XUuFp+IorZW=WAzPd3>wqV~=eguAXecWel8VX(wVGVtMDO~+@x5y+2NAx{O
z#M}J1Sf-93v%+uP1dMsF^%#1xEdDv|pEbPc=MnZ<ycUjK4c;!$yIz!Vaa^e+Rlk-v
zo&G*LzA+qLlN>iuoAiYqhWCf9V++JlNQghC%3T@uC-7M8+LzB|9NbHleNP7w%G`fi
zXmWu_FoMLc2vjpbBvl#wT>>-gnN&D}3g;;alR%u8JwZ*)o`h5ajGK#ZK+Mgp)3EK7
z)DBrf2j|GmczQ=TQO{0Zc6z5vWlOG{-PJWEM`I}WD=j=tV;GUGwhUERVLSz+qPNiT
zlDydiLwiYOJ^OixLihbPrT-ehz46sbg<dMDqF3V`y4+o)Lham~q*t~Gxm>k@n?SRo
zU3Lez7l&y4N-?PhQ}Xzi-@gZF63%ec)Q2$_?;AV8BDV=(P@Rh(!ZzIErH~tM36Mj%
z#|ZP90=&IiZ>iHF8GA(@f<(a2L<LJA5GAvrv{EKc%O0~RBoBwc1~>^RabZ7G{@Ts^
z1Ww2XgRitob1O~zTgj`2;$^n+CN2Gnokn)&$I}dgA+cy87Sl2YOXP6Kbn%v(Bw)^V
zV~K+-N~Kst56qeXP$-djrAw6~O{S=UTN>gzrl1G2M^&`K;z9b!!rSGURDr-oTFbqB
zIA;Q64t4D7(f6LP+8jpz-X5L@MzRH0jgezS!G$;y3sM;6KDhVF%QBb^UXH};hGndy
z9nq!F9dC$t(y~4^KY?CBEW4qQm(3_jEZ4(F2BbQ10a?jWVp{R|O|_eFk@kxZ1if$t
z;3WVdm6CRkqwbrsIoB;*ar?7Daw@Y>NKp*a(bI?0sD0i*(TND+CZ(4kmLvWeek6V0
zJ-d^jm7RBTm0kp|fNJjSe>{8IB@Dt{I8B?sDk8Ag^$nYSU2@jlpoJ#YlHD?*B5L5R
zLKyvhYQTEEOXebyQMfnXP4&1dwu>n?8js`$zVA&ANwRxp4i}#N3NW;A{gZZ?`Sg(J
z1RJgQ7OD&Q*buqpfG@#4Jwg#+4MHgp%uUYD6L$3(M;j?yPZU@4VF5btQyHFXi-~3B
zYBmt#(hYpiEtLma3Ol-<q^Q+oR}BR<I|gE$#55?~j;j%cND}qkaoUs_f7MC^%n?(i
zLJT4=IL#}+R2&CI(H6;AY*rt49t{7kiY`sGVw!wD@$e&>1iLn%b-*;+noUUM4ahK}
zrXTYe-lDdr0CketQf3Vz*RlYCPM09(S!iW12Z!iGB-c2Lka_VM1m&H9FAlXNH9=-v
zv*T(=RiAZA(*r+m%7fTyfMaM@13$B17z7?T?Ql(x@MDAg0+K_GT>)>yQ)#=j08`v^
zcTtd5>(1PXdiOIQhjxK{{)i4@%S-VF3>2vL(qa)|08{Bo7ZD84hfaG+LVOiuk!Pj5
zGGqqbQQO|)ST=0gZq*WgRviUuAvBrUlB#C2kDeyJQO8E%e0PX>8A#p18NmgF37KG^
z*>wrka)qso4t-w@=^n}?NFU2D>9VFVNPLSQMBc9^Afh2gL7^v9XyfliIPw~#2eGr-
zEZ{r;ZTgK@-G7Q&eZSW)L0)!rcP-M0_=@%nTJ0<04EiobI{w1e5^8jg9*=Hef#Ajm
z$Y}DK^Ky=!<v0fEXBJ923&E9#>{vnY9%Hu;WZtkd4%Yh1;#W@;{b37LN7OxOyPA}g
zr~#oYW7A|Jz!zz=6jP)z5a|x&E;SJjf-&o`BPzO|POvClo62|NwEUPn5!1?&T4k@o
z@}zBImIjh5JwH%2A|9GIw6)8olu?-e?TKpLnm45K(@Xl*K~LL1O@h>X*uxcu$d=_2
zT6dqVfX=>gA5e?sK(^aNayWmbF33t6y{4FJ!U$?4vTj8}b|jpqvy@-j<35_iVWWwc
z7pVyuwufBU9f)D-;mrSFrVFvU7$w<3ikt6yUqYQGrVC{Oz25YVPE)0vKrkfHWdLy~
zS1o2mnnfDA!@2OJoy<)S?|#iC_qsJYn0`=Vp802)qMX^iJYeTgn3H`~2oqmid}gy6
zv3Y<I_AMyxf!mN_x2_(4(Szt%%)iMgAQa;0H=1vKzHBv{V8J4^Tdl&L<-97t4k$pi
z!Z4YxN>kS)v%+^HGnRT25_^GT9BMK0xn1^{Ltc6DB^#(OUt`-oS)o|`ceULCv?b_3
z(}UVwxcF7&_K#@MAwERf3<0V<@@#Di&ZD4=y#%l<nJ=pkz*C1>9&!*HeKvh#q`Ip^
zCY6U=PL;ELHbJL_mdn+!9{kwYV%SsR;CCKHktC)Y?eUQo!>4P~Ht8CMTHZ*i`mdc5
zw|Tz5_ZHOV5{q#=@9J=3wk#x#80K@by;5vU$5-xXG){#{B%=vKv84z_-+Y->^c4y|
zcd-g*-7|Mu!2?^4q&*%g(guNRZ_qc?gKPHrT)mkJ`j8$tZ?!BJshUkt5Rb3j48f`x
z`%Ieo1HSEQsQ4+m6$sA$gqfS<miCBu2vD?vM<&j2OPM!m@*U%<I-fXOm{DvOdy)r3
zn=lOOpm}ZSo2T8fdL>~TH0x^U0DF?gg(hobP883JDs*#{0`uf6_sy)}pO(E?;}}Eu
zN1DG=t#RT%LlrMWPvX3%ozn{FkXx}+YTCi^*!Ei^XC~?gqVGmI@UJT;mz$NXh9gbN
zapBA9K8;o=#CT^^z%^cBU2oR!WQGe%C*5RK%?>Es7Oso597USD&oBjP=V}D6LWvwO
zV{1`;M$<ZGRA9%QNVD0LxjZ0LS+311M(%!gLN!9+Afrnj?XkVns2%v)+xSF(GUG9+
z3?ReQW7_Q;$<RKSy#$LgX-|gTz*s+78t)8ZQmBbRIg%LgZ;hd9ryXWt^8$|U^||HZ
zs%wiq;a@0_)qo0matHmF&!)yM)c@!YSP0S+RyYPgamn#}(I%KaAS)6KW@I%hxSoDa
zDPtGLiN;Q;2MXll7D9qsB!>hwa}PJ{-ENA>Ovrbrt*%3NAZn-ikW$ts2e$%h+qDq)
z-N&04P!5>(SZ{o@av{PL)&-mTTBzDHwD^jcYE<L1YR-8&lD_6zdF!o<_wDWBae!u4
z+*vPEjaC3P!-l(3R=D=^YqP!xw45P*)>#t%)~Vb{E{6yiiQqw#>DZYOdM7S()xs!U
z7g<xOw_;}sypN9M4j5Rfuj0d%Q%*OeUd9Wq#*m#CPx5=!If<$``}!@^Nal$ml`lmP
zAf@~39eyq!*lOf3P8Mv{1p#zo2v!BDZ}x67+Y%Dyqinob+G`um>%&KQ=LQuSLLPzB
z-97wh5?3ajYr<^?Dt$64$(_}>z;MI0TzkHH@mYa+9fIY!{IJ;TKasSt8Y&+9*vj#3
z)a4NN>TZ8pav^FC1c$nUVM`jzBw~C!F|v2a+J!|ZdF{7BBI-^PwW0bj49w7?jKBnF
z2f$`c+&U6kn`&Z$W8~yJc@9ts#~eovMk;Xt1`M?ateUOnL95f*eZK<IEo|8W%P2>}
zM(Q7GvaRb=7iE`rdU|(AM+F&NT_k(G24dPvi~(DnMKAMECszw;oLZ1*yDx4UwJ;TD
zsf5d8`by7SluvsPF*@J=1S=$V6Im$`%;aHNG6D6p$7l}1J~kzBvF}q{4rqNdCTQH)
z0(t#F09~(B#PGLA+p$|(6bOsd#QZ{V%Fxz6)IO8pJN2%d9x82@Pkoo{{nOK31Mulx
z?sGo^`GAZiKr}?WH3>51H(U1O$7`(q=KY{+df`et64!3nZ2)T7;p34(ix>(zQe=k6
zvtPxg`S<4}UJOXfuYT6R0H)VIl3kF;)!dx{lH7-ZrGjiyU>;gzEPRD1#Tm@KfXMD<
z2Ci=Fa#G_jT3BPVReUg;r|Ald3aRw8{<#bV`PJxgt%YubXwLZ^p|nKJ*=#IHU$K;m
zfiUD88QQ{A3x^U<r81owxWXjf+UU~zKI}VUw_QY3#ms=Z3x2%v4lG?pCpwMsqF$kf
zdvl-b1z+Gn&RiSn(;jU9CzPs3kK<z!qhoryi->?F;4#XtPpufYF75pBBtg<8^LN7w
zqd%P~e2igBy~CvlkxW#kk~gX_<c;wZ(7$Ld9$pJ3;eyJl($F4W){!UhKLF;tq#po=
zhq^A^R|o8ZVjZ~;RkaU5%VExSO8w3sKFdSTW4}kQ<v)0UYp5%4Ni4q_M7w}x>^Gv#
zl|fZ$j>nOwz9C^z#m^(fW!3k9j<eeH0~l15v&&-WJUV!0jgA$>Bhc;OhY0qrORk^r
zYCk{}YIldqRR?yG-7#>%2`}#X^P>x@HRM8{eUBV#<-3ESNUAs)dKfT1ss20o#aMR8
zjb0~pzqG9Kz4-4b7u}n`yq<b@&*&*i9X@y=Sp!rfpGUKk-~KjHnR@(boN!a0FH!bw
zHZY~*p^u*<0^ge%&2MR%Y4~6o1DpYh%xl^Nq-suiP@~vqgQYJf36s#bOu|Wd7&^D+
zd(R(z>T6XnfqPA_8E)oXpLWH5TD|_E)d!dQLGyh=QA?WESgsRwgek<H-5)eP5wv2|
zU~paa5Y)cGV!oRrzsyK0C6SnR3)7&aV$1Vui{D$p$3P9NG?gYwU0XPi8SZ@Ay{5(#
zMK#oSL?`R13~xO_O@h1LZpbLD66U-<8ETdkuiKgtd|go!Dr@fZ7f5d^!EDwRS3bZ|
zAgMl>x1%gx3K(d6H-OBykn;8`E|_Q7R}_1GK(48e?*D^5|IL*DVoz2MX7>MMPwxL<
zPgbV?hCNqs<^N&N>+f>G1ALw(bxOxJuzvofkVEH1x9rlUG}&tcw5eo$*=D=n@p*=Z
zfk!vNdM;0Rw7=b4m_&_*k#>RZcI+Yy>>%G6zdo)^Twf;~=IS{CF99D97V80h%qmX?
zZ*|T(k1oDV3o3i@1e~*+3CK5g4;G{#_4e<l8>7!tq$8A97~wluOq(T3@_bTuaaZaC
z)pm|`I|TZ6`EG~^9-@q8t{~l*tpurRK+U`Y>rnxX67HJTn=rOLIY5j;q!X?>F~COi
z+nXVItvC%vJWxbLrK<V~T1cH4fRF5U<3@waJ6YpU9A+8jhOmtJKDbL^9MbM7DG)2`
zLf(=RIYgjsQ@|YZ%yNI3KKz(&O3^7<E)&}i_xt$C{=!LwHWZPbBVLBcA7W-U@jzaX
zptd~{q}dX<M-5)IAQ_TZhGY;g<&D1Lw&ak9;*WD9$oh=g?i=?bY6f&>(Z@YCBT|*v
z+{li{Lda8|_#1f97lr`HNwFQBlI{8LM;_wwYg~wXlQEQd%s!Ew{v4}r1ghS@RPD$E
z=T{&V&YQThF2$l~l{YniiEWA(^33Nwz?i}G_lKmxm{gKEmRtsG`As#WB1kcj#>MC1
zj^l_&8L)yQ)jbK;DowEkQ3{R%_bOL)_PM>@7l1_yk+t}n$Ii4_RLnz}ke%H8W)gb&
zgqEC{<WQiHVgU(|rS+9*GDYPCU#_l?Ym}t5IEnqxIjdAZWZ3L*EGbli$llOYI5qY;
z8^A!$uF`Y&{Rgxvh6(L+Wpht;BH=?N!^E$U4YVe)S{kERkMFs4v@UR#2IHGl`^}da
z6v9L(jF0k8sAIIy?H?=yCDz)z{<y0+QJ&kFV2U(WF0W*=$E?~eLz#NeQR;n9^&gr}
zIW~VWiecHkqBTQvy-Lx80?+e25~+DKtHa;%SpQCGdn*QO?0fuG9)3Op>r*4KR^lK%
z>>&jG>kzslKPg3+XRe`m0`H;dzIU{yj!Z4$*4Nxs@6yGIhbj1LRZrmxcIf(DSO{Xv
zBb5FrnF^<<tD7LDwH6;u$FVbTKEUlsLt;(%K9$xWZPPaR?9?_zNCYLYIq@uy8sz$K
z$hE?SK~c8EekhmyppW(88VqP}OP&-nF8e<7?`bNSwmUUb__GO!c>~Y|t{}azGJ;w;
zv?Jz3D4>;;Fi8*qj}wR^qk9#S+b_ZOwoXwyd_Rh-!|=lBLXNvTiss!yX#iyw7ViYR
zvI3zCqbk@*C7T2C&0jp~oc9TaRPdLT+JOdh@JRT&?#FyHdKu@s{fOA-qoAH;R}5@!
zwikT!X~l~V0i`%-Fqcfobr{BBfd`cqm^m1+rt8rlxIxNuqVmi${qeSEVqJKdN(p0A
ztdV}Vm*QaD&~t>Wo|8+<LD8aN1!vVkBfW5PJsjr}e6i~RIi-?ipYSgdoCvUaJW|Qw
zK~x!N;aAk_Q{)k1KZt;_Gk^JMAXC(eK76J;`|UIKO#qyp90!y04L2`~2-i6aRDAYs
z#GwFuzJiYUM3g{S_cv?ZJbKqvcT|W(R_y*~s$2J?(6O|WkWDQiiX;g}t#Ipv*PJ3x
z!G6Ujfoay553E1S$$?9aQW_F}*&FB>OHp~=3fu1LqTfiM4;_@KV_2;9cWpOeNNem<
zx?B<ic~j@)dQxP^p80*_{>~M_g3BV$q_#GtL>E_UPd4U`@mq*r`&l%&dL*^duH{14
zG4F3?SU6Y%X2^RUF;+}7xu;Z6>MX{C?<`;1n;ON7w@q@yX}89efSDi|6soN>P;om2
zv+*{Rkpc@A9cw7Vl#J2)q|j+c-;~Tty_41$xHzF+XY+2M5tF2WYS{Fduc7(ATvx%9
zgK;r`Wte&A%EUIlB!H|p33Z^8Y5s(=pAoPTW()k^p|MbA|8&aOUJq;Yz$igAs@gQ>
zOkJsS*)*6<qRnK;2(4NM*02j^O+HNU+Uo^*3&?y4KdX{Bz51%Znd!#jJ?YL6BJ%kl
z$7dpggNols{VK03az3*<lf7_>gI3b!M)?|f%;HokkZ5!~_Ea$;PRQJvxpLX6St4xS
zsV4;FzUh*#WwF_j8+hIArBTxjPU*Wjtnp_=!p8_K#?aAK=Hko=S#pVf^49HW%EL>~
zj92Z1>)*cZ0`djb%ex0K=>q;~A6eF4*>#NApRVn5zkwl^TKorB{R&_&?Y!xA)`DfB
zs8!<QB1^bQ$X3#@7Q5nAg3N+&-1+=kt6yD%HLUVTRMRv-Yg7<p<zOO;C*luLMEnCV
zolW1Fx_S5Ub|A6XXqQM{9*82fp3i6QE+1;zU@jBx^1TjgxLur#FyMD)ib@t}tk{{T
zOthp|j8ZMGqS;wKl}u1u*^i&Ej)K);ZHndY!g5$VQQ}r{PSHx(!wW;X5rxw%K&!5$
zYvZ**PoED^rid<Sk(n=01uJ)EX>_y(S>YXi=_`k+9*1_Q9h(POJh_NB1WoTmSN*(d
ze_fpq_a8Zc$hWT1{<a?UExZ~!+f7#|JndN_i2@oh^H$la*d|c;z=$B+qa6(9{R9Gq
zAD_klCJr%dOzYsMksqmm-2WB&08u?dK)g^|enR{LOc!(nX9)=RE=-|2-d{vOY3F$W
z&hQQneM{&l&PsC0Xm9Nw`bh(TmKJd~4SCY3zErZeK}6};MnR~~iEgs=NY(*rFFXx+
z_KjT{J<ZZ%I82Kk7GH>d3e@tKP*>^G1asL1LVgBT$L;<RN7K$9z{PohCi`$u*Vr4a
z=!YQQJ1TDU;&|X?%BZ1s;IJWhI;j)7y^ieW`$S4wt8)}pl!QQ^<E@!3MyF&d>S5)E
zQH^6q-=-*m4mh0VtEJVMVtr`DkYnv!b-C6%;?64PNE`1Vz|X^ST%QAQ`*8S-5vpwv
z4NXyr71#Ckgi1i$s3S8y)ugS((Y#RvnUp;Suhf~IQeW;s4x9y8TZdLq2R+I^lf2vt
zb-RL&MMgmJdx=4A7gW#GAiI8Ov$>2vglIq79MAa`NEg46@tHs}qjay9O2&?9Pk(G5
z#ej}2bs4Tt6t$cVIF*8KM;@keTBURS5FalS>)vLuoCuTn`&w2D7+Wh#GZ4n!#x0e`
zfp&YInwRmK%2Oo={#k9`2BAeY0n-GLX~1bO>-w;cN)ansxV|wC6~aHdn9@mYssTM}
z*h-ZFz>GNnrH^+Rl*Hd?`mn5Q$5GN<2ql7ak!I*uP$fvoY{2SLF{NQNKw)F-i4)nk
zF?Xh(DBxqdD<a4~1@041HQyARf}Ak(>Oo!=lShY+5-byL0KKJQ*oL8<Z`RU7<7`<q
z?UnVCZyLH52mAk4gu#gEZJEe^N1LBwlL`V>*I6AP5|%P{R1GVv(Tl)t8r3~_;3M;(
zSD%E^?x=Wb>>JmTFv(zNMjoCV7n>s^$2caKyg$)MV;G-CD#5?MS+O2rC?&{Qlxf;4
z;2a*>jhjq;;vpeAq}@Gp&Y`f?*hwx$$je#nd(GH5LM`$Lq4@-U+P7A#eIl40-KMh4
z8e?!?vVD|CiPeATNb2RcJT#Hif6>^`!T$A;s^@bOh2^sQ{&<~B*t&z6_!iAWRTzC*
ze_qS4_Tnv83<LV{lwvl+q8l`#=V5OlZY*LS_TDQ)vnBDnkERNUR&f6u+N7rd>Zx<n
z%dRS-@2h!1*<6SIXp{fJ*#<`S2C>8h<4g+U@v7f0E&{jo#BpTaVXwAyuTI0u?&^y5
z^g)_hyoC&VDW*6)ho&PlwqaHqI&b4EnmCDi$yD9n$4YxyjEr8iJ07aZMrT$Dn@-6*
zCTf&Sxo~pVxSRmVW`p68#6Me{Q~(SafC(aVTw-ruxW~RxH&X1%4QhM}VO+T+P_B^w
zlGe)c3=N1R!uW%n71acu>^V4&8DQp&3_cM{y8!c$f9_u(FnKMzoev#w@<=kh6q5j=
zVyOh?CwJG}jW=%H%jWk%_gz0|obD>fz#hKdV9<;@-i6PsFl1e2maAU0OKqfr1<P}u
ziY;->w&NuwsH!yiX6+`8I|D)8!rjOgY+bu@37Re;M!;VwFu-vSD)7zI%>sWc3dLRy
z+0yS^972sJ!a-!vB7DQF1MeR2Imu<gQ@Oeb_PR|FF_si!vBY<qSE^xv#m-fP4ZMvA
z_yhX0Z3%|3(Y+{mswe@QIZXa#JTp#8^Yzg9Yjkz$`z?5O)egkBv^GbA!f_D=az6+p
zRjkJ`9WKlwc&K$HI*pD<D6pqw3<Sk8T({=;(-;VG(on|<XnKga$Z^+=d$0O}tS`Q2
z-Btu{U9(w1!6e!Vc0DK{+4k3Ds}PahtXFYJj?8;i2!K+41?QGNX7{(bQjbaf`6y)q
zC7ORzbW9G#u6Y-~wACc}q<1^hWWDY+NqN>28}r8?NVxR0pt!kGBtJOyn$YN^;B2br
zQzZ+6nU~?S&>?JwUd{8NYpo;Cx38+q^5d-!PK?j==f?}&euA7hLDLtc-Ir5%7DN^>
z-8)CWJI^PUg>Mt^JA1tI((E5FS7DoMBMG<(?;xysAgL#-;4IW9e$sTqgUGrc4IeY=
z_Vlr3aW2?|Mh7I_?0iu{*^6y>NXZ|4q_FQ0oTyFw5WmH*th~n;2=-kD?G7(Uhd%m#
z6Q+)l6>+)x5%q6|8H3k!=d|&;Nm#>}91uUmRk|*0x1)%eAE4@mB$fZ5!+%rXzv+;j
zh53K!koAA#KmR}f#mf2L(BU=ilnY+RUE*fCpowbtRe3Mb4&rHQrCo-qGs={2bY93v
zhPq!Q#&4<m$?+astZ6o;0VnDxfl@s_q%dnXoUa>*=Tmk|lbJim*I#~ZJU;uYryaBa
z0mH9el)FQpS5LpT?@I!pP+i!;U}lp(LEW-66MOU};fOZXZ0qyYkD3)k+`et+@>r*_
zAIqPPG`MJJTi}3SAch5pn&<y4^B$HkA{!-O+q|Qem{3;j9m>ypBByeUo=(ulbzCkv
z9XoiB;U329$PnlcMeM#KL}zN}a~eo_Ok2~2d4#a+#wvnSM_<{4y5cu^;V0&n1c=XO
zUYs?kDZ|4UypCD75fq}-1{XqCoFJleQJh$cg?IeA4E;M6V{@K(_rBL?Vf|u1Qw5l+
zRyAo$z~r)=Ph6h-4JCTXNr7S6jT#BGN|Gb<Nzu1t%Y-0!f$x?6Q1XN=kJ-m}EH>|z
z?dk!+)<PN}Si&qi-9E5X*_#9<?|8=%LDh9$2^WxrnuZ@zI1Abe&pSdT(}te#Xu(!9
zF~pL%ta=UjOOI86!Cr*HH?E@Xb%3hN93mBlKxv@SLJkQt$2=URfskWL124Pbk4kp<
z$to!BE0%*cUC(OGa#_t1920lCQM63rq-cW=l}u5!v+7l}KA05h1Swsg1Pn1d!N`%}
zBWEog_~~>`c<)6(_*HhufMMiLacrXn(X1j15C`jzHG)2^;;0Fb;1sf*(vY%Kh{dEJ
zsNp^)Rj=f*)yX{)I#Y@MAVKW~BHvP6SC|yLP{JBh@n<*J%wbD|_)DoIDuju|v4$?>
zG#rQ=3zOkW3gHDyLA0Xv7$#JEBs*cP=dRPqP7RE5&}rvjF!256&JvK14AaOei3Xie
zl;;<X!tYDW?#2W%MmgfB9MA-dvQ0ouUzC;}tVI+sEs~Lg62yM$l8ypHs8j;WG=4dq
z>VXBV*F6s7Y?GCB>m`3RXty`VMAx4r(o=Cji0s2p-K^|NQ_9!WVV@0QmO93kz`oF~
zcjhTvWa!k=J0ns<CaF@<EA;K~g6V#9zusp;(JQ^~Z47JR%}2%mU6{HP`+9?d0t(AJ
z<{~(h?vfyh-hLt!Y;PU%*GN&qqR3z^z{}_cJnNTwcdE0WKCCs??Sxh`GpQ-^IFc_h
zFgM8q>!9M0!w}xg9_s8%n+C!mEenA7aB3ES&!fBAJXH%0iMTNNZs%A6`Cdtcz%wW}
zFYEnJxNWZ}-k=eIB$5V{ZKzl`@39)Qin2~nQOwC73b|mU*@@S9dR!Yr<tT)9`DDfk
zEt0c=;$GU|ug@ebgLI~V^ku0sEPzYoOo@){70RiV1zs3C^)Lpx*t(q?{`JG=r-HDV
z+8Uwi5!o0-{WilaKbuwkHvG-fi<AtMB*xlC-6Y#lJA`vuhE<~8k+cKugiswp=p}+D
z$Q!HO^>@#^6KHb&!WN0hXfD-D@0c|m4Mr;#bP&FvSkxunBZ)FHK`i2(9vdZDz&=9~
zTCCQ5j~k7A!aW2vxEVrncjVw7C0vm)3{h7M-7HTO0XPKlpxew+F>)c0b;jQqAQ|8z
zRw>ncHf6BLBkSW3BWC&K9>$F@Hb!)+y4mi0-uTt1Xp&{Z*0{`Q57ybpP8<0N;DSeP
zO8XY7->^)}Lh#oxUzzcqd=;$mzUmnGs5|mdjLxD+MVtkdL$xqlqU(?}5C>Eib55%w
zk*c!ZIfISIh%nB`9I52jYM`y|Q3RJ{chcz)e@s8V#3-LWJfjc_rmEFO01U)d-pC!x
z08RH)cGFsRZp%_JqGDb#(MnEk%ERjJjbr^{LnazDa$pY0o#aThKlyBl_#e$Ibut01
z?xbq^z2!)KrHI^{Tx7z9z8;^?m}O<F!dgMcGPEtn0;G}w5*$7fVr!YdU^0I+a|@y&
zcy_TX@P3?L5r?95)XQAvm{u0kt&^slb*vVO$!B+$5j?P#K*B8oSR>>>f7B+7+asO^
z=(7Nnd*f#L@zx$vwrtU-wIL75(?8BVEg&C?<rZtV>CZi7{)J^L?9lIyk?I@wIQBxZ
zW~ITt+#^KCr{p#YH%)<u0#I~hrlf4IJeye=+i2sy!6iwlV|V`mZ7g2}rPqp_$(WeV
z3(G-(lfI23%s4UIWv=_QiSIDHQf3Ll=f}HKT(`$gyYx^&hrI=Tak>f^S0tY96Xms9
zO$M+Ew)<qYv3>DfMqLCS42vTNal%_#EGNE6-|)r2?;`T<U7x{LJXhl5nAmVDaRzCE
zO~Psh*RyTKW!&v~-1V{Q8+qjhN>R^Z9Gzo5-UeWqNV-JK1)f$EpXsFqsfNpYL`qzH
z+T3`5+TqxyZP-+U=Rd^V=Ezw$%`P|WHaLAt+9fCIf_kclZxa&obdtxJ@knV)>$>f*
zI2G&$s|@<)BrvBdMKqX$*R?ruA7JLvx(y4QnQdW}$xjSojL2OBxTl~93pZ&R-3^E4
zfoJ(#gpyX1(-?$XwH$Wyk4gMxQg($cj<99JH%Z|6Uz|Ecj|9Oa;+k7MV>W8iP9ohd
z73<1heDt{QIRjw7Q>$w6J|`zE(1*4rpK6<ynC=w{KQQyhHYcmAjs-=0i`OdoK+s<H
z3@>fzMNAo8iLMGf4XQ~G>pGXGgQBl>J#)@F<IGi_eLkMdTZ1}D;Ot_npm)kTGPdEp
z>>Cu_RTHFW$yDOEg-nTt5oMWSUZ@=KE0rK_I?=9?bLxYywMF({lkbk?Pqeib|IV_X
z$Ukfh1S*2{yO9IwX~aOtr_@!8$kC&jVE?gn#?D((sF;>(#k!5F?XhdxH~>f1vVJh-
zyl&baGcf((xW?{0WA`FhU7kZlpmeu~m+$`;BDJ(Lq2DdXLn*c9=vJyAL@I?>DifOY
z;Wp5mymN?o%T$8&U`t`ugSnWnxEAdYLzcE9psQU$jVLUvuzTuQZbA-U8PW_O-hUIl
zN`j^2!}c%`)hZg7xD@=3+Vr|^^?VFk%;19WM$?<NM~C!q;a~F09Bcm!#sd;rFO#M5
zli2smdgMC`t9p8LNfo^n>KtC9Zt;Q*g+$M%(8Xm%bb~TdtrT7UF`VNz8|CX<;a<}x
z+3cLmlK?BnzP~WD-eP#{<%37`WY+y-o=2k^&U=tB`4UkmEeHN3wc~LE<^%sWS?{FS
zx0lVF5E=r@NsI3Mht8m7>`=>v_62^(1((e$ai+gy^Ddt^$fB$ZB*!^t&L=NkDx4vH
zr-$Em%fnbBDI7xdX|FWYm2HaNTuVyudZ(|8TOP8p`$MPc4#c$S%_*6i4mR<F7&(G5
z7w|x=IO6eUt6iIXnOmV*b5|+oz(eRz9<I}HDuRB~5>>-LzUs<SMQ?hnRVLRYn!!xO
zUp}QbTBDZvcc{$Eu@|%YD^-C7GR@*GvEeS;nM>H#F;aJscVV5E9Q{ub1>Djs{QG$s
zoB@ZlOJThQ-&BGw2?SITj^U3mpY3ku;aBx&MOpDBrcWR9JzDwwmeMyfG<Yt_QmJxy
zRYa?jA%v{qvqm3B=zv;_U(38-y;pW&m=k|Wt{}Ix)+A;Bq4$MmF9>14az=SvEj5_0
z5Mld`L77GKhkS@I<L_!6N$wxpEihZpd4q@a5ja0DNfNV{bAD@|_7+o`-Ke;hTVzKD
zVKn&uKDFZ_Ky|Wy4{_aEg?AkbXRVo&(&pt|n9P;%oKCQitY4wH{Ra|8p|daQ?gJV=
z=t#xmfvi3v1(|*aROY-gd(~ugq*xbVQ~bc|%+?}}mSjUAd=|Dc98kU++gewrxd`V8
zj)0E%i_A=X4F$#!)6ma(^LitVai>H~l+%(Nr4WX-Qe-baTtB-2@meV8d|8WCnM0xC
z;zjt<C!m)>nc%?MX({Kq3Du*JgTTcch+!J+j~n(3U`2Ic$`*Y(=<A2lt5Uk*7<Vqq
z($fa*xPbEY&Qo)@E&%+{#?<OR9sj?a{J$JO3pdCA?St6<Gu+SiUxoYo^t2uE#*q>3
zJO3uEcU(anl-`006POHlA@MqkfeP31O%#*C91us9njNoIR3~g)*m9MM4I<)j`o33w
zS6|dr=ll1VqwcbN{%arP_t5ie<9ezluAp}}V7>9WfK#ISH*G(=!h^KK9H)+p)k4?4
z-2>Kp-&XJTU#3-#uZJtYk0<8X@oSC7-&n@&<BS~6Bmj076=OAXO53OBSHFj)o18D?
zoKm;=m~CnSR`N6?mzRHH$dndwfBK|n(sg@!5Wc$R=a0DsTq(eodOSZ!t=a7cuguO!
zI^AqI2dU=0lLKDn`a*%T9?Z@>*VSPU>xk=4je*R|p1ctpIwxIS8O0YAB)f*(vChmr
z8bmxP;`V!DI{9XfZ+z`Ewpiw3lnoABobe#uuzg(5hNo|m?M_HwM69f%!9QPK9djHL
zI0f-$<((i!7eZv|y<CP>Log8X*efc=@*+%^bg%c8)p*(t`$C?e_3-T+4-~|Ry^6A2
zdvMXpoJ6u3ozH@%e&KW+*lk6#c^sHu=Dg-FzClZJ4I`{tjf<gK`vhQs5eaoSV;yDF
zg+BMwRVbqJ4}}DU0kzJ)3<Y}i&0a<b3J!BP0@+n`n($*=N6`Aj<;6usg!F`$l-6+b
z+g*6$!N;Q@gMR$o0+qA6t=N9bAE7L9zaYv&rI0mb!TKPJ+KSnJnTtvU8Ax?fc*~=>
zZN6j8PcqkDI$-KgG(JO_4f&>WloR0BGfPH41A_||N()q;ml{~TgC>f)`BuJy#YIRI
znAA@I8kPaBAI6oIh{nz#r%8iFS#eO&NjfYkS>o@F$z%82R&9tEvk`=*%^*Su;XcGv
z@XgN#u`md~kisg3!2l7X#;Z(le}d>HQj9RN1V!#Kyc&eyIeA1fwsg)xP;e~OA~iNB
z(~s1~{R>v2^=YP+2}L+)6gx)rE)dLa-%oM<wr4z&GEC)0ejB|59I97RY=N|hzf{DS
zQx?b$wh#~tN!FW+Txs2jQpxztR0+2C+8Z3GZ4<HA8H$_&RPg}pJ1nPSaRh{v5}DdQ
z8YMWsHlWSa6UcQ{)JM+&=?WVoo2sqhjAA@c7t*dmr9WyxzC16sP^Wg8Q$c14GOHC7
zie+`#u`VigDjO;8Y6~1%;q0L`rNbA_kEKsn+FWETmS~>Hjs#f)n>$}37)M!>)sVwW
z0ywLAzXX+=FN5TNt`}MnXU#Y6JOok!K0&Nh1F;t1Rh>wk#w^M-pef3X&DCXbnFmcc
z1OimNCc6`jy$m*yOCs@rfQpobgEOCdB%FGF<4{H4HQM%kD6aRrPP_0q`6)92K5TB}
z@vU*Nv1ZmE_Wgc~9aSk#xZolPT}jv#$Q*8h+Y#u|GcKPmz8RJtWA_1Nc8VHml`f{3
z^?NU5vbYtM4o`7qhc0Mj^$kO;Y4=151=dageRg`<xdawfJuU^^f^L<vfMgvazZ&_$
zPqTEIG8}9MrRf^;BUMr>8(m(-Q-&kx`o!8;2Y$hhAipwenTRB{#|9Pq=UB1T{#9(4
zAhNvM@XG42834a%?M_}3QE~y30a-k7HXn~Wp78^jU8;rPWGNG$?ldx?b(ISw&}+r%
zIfJo(n`l@Rp^ee%%95M8ml>e`+ET~okr3~2#(|IVS@LgsTIu_3d&GVr8Sz^;23T!w
z#Dfo>Msg*Vw_Rh|YP<=b5qeI_dhCjnCOUl2>IBskRB7YlRl~lf=U(Qt7%Qd;3ewQ%
zDY)*)sCQKVP9l!v+8+b*(TBNszjEw-@Gi*rwD!^Hi@N~6Fj!738e02lt%!Hzk5Psd
zW;GyV5EX|qh#QQ%6Eq32{F#PhO3Hm-`a!#2iOcQwk)LNj#?3+j@zb|y7umva=2N43
z!v)pDpI=M5Aa(i*1CrKQW+6y+{Av~jKW)ObXgq_7py-Q+0VbGua^$=jQ7=*}1Bo9r
zLM3hxerE4UlLu^_QD>tm_`qoqX~>-$wI1=#zvlR#3!r_d8#Ix71kL;ew%LrR$~2w=
znMh=jY|v-9a<4xkB*+O}UF|K6Fy}bDIX$G=7I0twuAiF)%{^ifF?h>C2pi&P-0a-U
z#Z9DCpW%39Lqo!y6l~n_@v?)Nm!-V0ScqqksiC(3*qkMyJ6NNYWt<Lky%H7%r~A?E
zVBxkJYN&}Z*A|`O>{fZnVH`oNo>Q_(<#_=|5jg(K96%NxHh`rLIge2Z?ACWSgW-}2
zMQOiFeuoplG0UL*l|Y=hZwxFG*bZ$#NX4`krOBOsFVqyl((S@PTn~!L7V9>5kgBjS
zTsB1F{55f`j2-WF(9_CpAKn`DXIh9zEX%oXh?vR2(d3D#S$)(>e26oXye=YJ4I4ML
zjF*h88Lx38)5vqUgob`q`|u&ONyf}l|5Ux@sO&N+By)w2t$>m**Z?>GZvFRg6^VqL
zRV<@8n+^7EnRf|W{Azv&l@;H<mbt?e*becp{A8XkSO>7#9W8;)7vbjiNml7NbmAp@
z55M0+Hb<pwS#31fe_AeJbw?jb<|D=eL|cqxrFVOKm0dgs+#p61q6cjDMZ0R?4NF8}
z7i~cU0}!a)ZW8}EF~w6%OQ@w+?@xG}X=^Yqo99l1&<S2KM+}kc1ZmL+isNLtM=2;w
zVl&Tbisuqu(gvIi5dAjF7k9d3f;kK)kEcvMH3|^jo(mR#*q#njLq)A;kWB2Q4^sk%
z0Y8#mFYj{ki^C|H5+bPD0u}95bS<=2nZ5>`3)hO;Iny#qyHc7W&oD=WHVU9fhKZmB
zCDS)RhkrcW;mpGbh0!v3VcnK1Pd4_ZxV4YPMamhTI#_t&mixL#QfMc3r=Rt3uP=ws
zkc3>PCigy$&hnlMMGudJh&rf#sDGafAXmOsYl%pOF&s0h)OS2XV^uOmJ4GMmj<?JA
zmG*2a#KW;$1Lc)N>5dzlSA~>IEuh!4qIm}joDO|aH>TFZ9CojaiW~wF&s+Kh(2m@f
z+XhJxTo^Spj?^cK)~$GMk!XxvmW=0Zj(S1+{9VjY@l!UZila(qgfAqP3CK4ayPB4#
zjS-p|7!P4B9!vC~j4A%8TQ655_*P6sim6Qw37ENwb@`*bu9lU!D$0ZCq2SJU#Hehu
z(-eS!{|(hk(LaI6ax(+!=i2VnA7Q`WhbTmn#UO=Po4M4^n+I|44E0yQyZdJ!2<crI
zX*-WWuNXPxoGac@-8{nDBOS$EP3w~KT9W&D2Z4+#M$;7tnGtSg<s{zCXtnEwu(3l`
zgL@M<XG*#&9PRFH0#T)JGf7s`mp8G{_#x_x21?Hs-&M$78UnO`c7D*!Yg*}d&+2$a
zAfgTYLXRP%Vg2-04~NVuFhROWz?HGSt=P$Y0KpAulVaR2SToBqcxO62E1b?M-t2fJ
z+t<tvszpWYP4)af1zr)%MqSVd^h$WhdqlEom8xWHqh%V?YSgGi=ZN+Y-+#`CBUK=7
zl7f$v@g_6usM5M?wm!wLZ1XA~$c<_!30!?A?!A)O$sqqydEoCt)rdG@GXO<HIIQzk
zu%$a1#KIi7vx9pwdDJ{PW0+(Lqh;vAT+`o0aSlJqBu3jG2t!NA5%G#-Lj|c8q?+PO
zwj{cJNRll3EZVg@x|5TBx!d)OtE-NRbsn(qVT{hCA;{BQW^iF;v?j}351j3pV1q8t
zty1OoB(A7)UNT`rFfNDcGEYx#6u$xOz4#J2tZsD2Cb2oDiWHws7rO=^@fwUG1h2G3
z)iZZT_T8QAP>rs*jW0%)bH^?PlyP}Jr$TywoVoSUAHeTb`9-9W4~z<C1YWM=ZvVmk
zyX>K`zP&rGPTYrbm%M9e(j?8RpmymYs)egm3!Ot@2!Viq%ctMYWzdbSjd?o&$=j3q
zbqRRm14NlGLpTmboD{VgMs+meU(*;Q)U`(=?V`p=HnBL3lvZkpe{*$eJgu}Sj2Su6
z7o{*yP#-I|u2->EL#TsfhIN?he79QAs87q#Z$cvsQdZgMV+J)(2z!^%rOdT$A%i(|
zMOV71f&FFj$>fKJT{LXh>qA3@V%iN(@a7fqWp_;Qs;bxJOac*_pPY@Q)>)eU*oieV
z#KU?Rj2$|ZUvhBfR^LsLaRVg{^VyCuP?0)CJN;tUo+Z~u9h&cANWL7%bHY@4q1J@?
z0HLV8^vzI5c3cfHa_L$l(Qg)GmD~6QcrCR>EnqZk=c5I9ErrqMbyu%#6-kA_91<9i
zGA#t{4zn$Zwj|Ti{7^Qv>|MhnSUtzA!8#>u!7W-XOQ|O9u)WHoW8umB?R>cCLlBZt
z+?z-9i0hYGA9BBg!^4?WR}L$29=MbCew0QTpwL5G7QKo9ty7e@X2-I<&~A%K3;*5I
zUn7x;{N6vKm5<xKS)cqMRJh@($j80M$@VT`NOh}I;|h&^*^j=iuwVj}1>=}%T=g|X
zh>F=?U2|8&QwQ<&IjeyJ#||y?a+XcxVU^b_a_&<cvFN0QxIbnGhdWpk{WjbcGNRM#
z)_LI`q^~zOcusu7yY87BVg_GQPyf4L3V|^5p}qBJQU%q422%b0Sc$?^|H;g2ElB4Z
zYK7c*@l}1RVHH2A<y0ED%1~01ipqwFH5k&vk_#dA+yBgkC7+7n(w1Bt*}Gg|t*wtK
z%GTz-(?`xe{_pFNzG_J#2rOrZpGI16%DVeghyVT*w5;RS*WIo-Px9yS`0e62^48Ag
zbJ)F^?<uHJ<Z%|G%}4OAb}vMV0Ijxh`{HUMQ0tmpwwcRU7VzW<dl%2A&RA-4r9eU`
zX4-=%TdF!dx^z5p$2J{I#uXDPr9cKEIsr+g?}t*@(0=<}Q6#Evcw>o-jjd<n&O(3x
zFZ76Zjiq2wOZnBUlYtq3VLr;Rw8-6dgxQWs{gIs*&CI5l8h;Nyhc)KSzRjz+j}f8R
z%^milM$?AabUw~G4Hr4a2QTy0;}-NEV-juv<D+)<E37A!*sFkJ70?_d!|hE^+Hgak
zMwdgbh}1VVHPu$c(V?Ti%=l`Cg8<z2OzzA<;AmiCvMtD{J7`1MLe;0#`*F{`&MZbl
zyO7mQGW6nwK>f5_rl%JWDw=39IBcrpogXvjpRTg*$&5%R@#0us3F~W`LlQn%!>R2h
z=g_RUT1EJrI7~!m8)pf_OF@gPPOWOE2+WKC;7|+w<B{$RjhX!iSL|Ps<Zzc>{=87f
zldrE8VD*PNUhM58G_Mx#-f1LHvEViU-)35Bo0Om@50~;T`r%WqDQ9b+4C4RC*jt80
z@rG~10)li&EG;Rp)Cx!mD4l|I$F6jPfPj>wv~(j~yL3tmEFIE~v~+hp<M01G@B19b
z`{n%v46`%$%za(wd7bBV&+H!}H$JaqW0*rO;&oY?TM*-Jfnc0Sm0tKP&Fm}<gu7{0
z=T7524eNE>{Rzt&cgWwRj353YUnt%pp9mT{{5^_`7oHt!JWDETz`kP|Pf@8naKiMY
zt5VZJ2}doO;Awwj`-4tXsrH~`w_&)wOOV5xHmTI$v_tyQf)drnmJ8jax=f6&aa(Wd
zdr$c5cNMYoajIvq^q_CYFDa(2=yR@1sM9*Cd?sg}PF#P2>iAF#k9~f`n_AuC>uGm%
z^C@PK6upxeBS?EhYcH(M{{MIr|9KAoya_%CztI2bM)Ln#nEG$y|J#k^hy4HPM((vJ
zma`~2Gu3e)$xc+2wTlf7_P)f>ll%2H<^^6}iqR14-AU|F2Tw8mEO8;${pX-GS@U2I
znTbk<E>fJ9zdzmf!XrXLn!0Zt+VJVtraK<AA09_WxP4C?>dV`1Z@D|Cm%Xp&xfvg>
zYVVe_vWy>lmd$@Pc+>Pm!*r45ZTAD*4+F=$sbZTju~vJlhvdTjU563w$Cde;0f*q|
znuz(T$HL`@a^nJzTRHWD$BXH^p2xli!TkXr>g9)>r1TKil%BJF6};g`%IW=iU++K5
zjK$tUQ`hsxGi_T|I+qT<mje!Cbv_??X4?{@t%O?d+a5;Z)vv^T<ku2DrUrNV#Iy;&
zx60!Bk`{Hg|FkvFN>AbDy0gdATYzSqq}8Sm?(%?Kp5LmAHd9`Ha5*1ngM5CN+1!|K
zD-P3p`Cz=a-1C!0|K`FXMa*Xq=-S`5Dby;TW~x`R%UMzF8td6T>@NFUi?<EZb)KBl
zIeXoY7*83TTKZdwixr%m-&N&7BnKkz&Xatn9VW9UOJENx>FPR{*D&R!B47JiP@(G(
zVpS;P!Y&KZ4CDI9bwGFBR^V}e(G+&%yZm%w3BuXg>cvO`EcmC^Y(xx*DIN1#^=~sh
zSY?z7U*8n^5)4CVcSB{;f}_28!8q3nlEWSkD>WMz$<Ooe$tT++JulmK9i}`QTN~*#
z4JX9m19uu*hj$j^VruC3lj;K-?$=(Y=iebtuAZ4=6)L+yxyinIE57@hBISw?H#J}O
z*bFIZ%hTJGc6)fZt7ms=eDgRtHRz@&dKmQ&l(KFWM}EgP^&8(o6Vrm@=N+1;JB+;!
zT*M0|-rt_^(1ANMtJCq=Wi-Ptf|d=ku9Oc@8u17o)ryuEr06;Y)lY2I8{2z+a%)Yq
z>37EOUQRa|?o23{*Slh;+{06h3o_cOc<w7lGCQWT?miQTjL?Wr?TNKMT=?Fe%s(E8
z8@RqEXoQ5J<fn+FQH*9*UE1clj9@<I@Koh~p;~J4oi>AX@)Ec-?{Sg;OkS+}GO~BE
z&3KdBc6Mj`Y<k34JLsmhacJP8uC9qu|3kfYQY}<Rt4&*WT=xA{o6vOZRqf(;8w(AK
z8vCoCICooxRFf@-4w)0xR&dF(f$iQ7fkFtr`mlBJwyWu5PnK-`P+iT4cDdbUX9@Qs
z$_o3&pVYl+AWbZ=A#L&@wP!{i{#G}jORLi{+F87f8N)wz#)y8nQ?$3vn-0S2s6FVE
zh_CP9{R<zlrVx+ixQBD`&$uJRA9ufa_qJVa-Q0*%Hq*+MjZ96u)X!Y5IJ(#8tsaCu
zPYGE$Kpk*?5tf{#>w>v|HesGwy?D?GI(@U`v6N``^R$`%VvCkvY4<n0LiEw4we-dO
zW_neO{)Iyu|NLNGT1&eze+H+7!Ns6@5C2qw{#r(;+Vs0K@I{I#qx;=@@x|-twi24b
z@+4WhQdPn?45<u-14~1#>G>vD?`CN<jMi0_7w0_>57`^Iddk;P6fULW4Uo~yL#NZr
zFBFs_S*<iP^x0Dt9Ia9+*NM;KO*IcS(<8ofU;f~HmLYDV_3;A?%eb**z3r-es`7EW
z?y)Q~C%ZA{W!ax)5372+19q{xSLJDIq$z{O@LhMX>uqmXxdP&1=e)<)_@j2&xB_A_
z!UQj|kg@tP%Q%smF3FA|yu?d|RNcBiOH@X7btqS=|GIUtYdL9f<GV_ef@x^=HRAYU
zv5KDaHJJ^rgwpt(Jo@8YqOc~zKAqlX)ZXWiH=a<oSt7M5M-NcalqlIMrP7-lNKu_f
zAJ*pZW82!P2b1TgXWUb(vov(?%BNtFB8-X_8Xt07oU}(R1&mxCj)@e1531q2;C=h(
z|50I?*L&b<D$eUXrpk~HLA4dlAvErk7rWw}CO5(7GHtz-kiI{(7|<glK#`PwCyH@X
zJ2Bw+=@zQ_OstEIt-+A1?|F&Mjze~PmC0Pm(FDk$jcKzy^VIY+R$zMhUVYRd-%^I$
z;%ncnHiJ%+pEPg7M>01Lr8lV_%OPB?w-WIO<KNL0?0E3L6UsWbd14J_dPz3+i%dE$
zIxX3W9Bh7j{P}8J{H|)gS!(L*Cdnq}*7?;1U0ZF-iR^hNl692T_fA!>%6`r{D(d|E
zd17y79oi1FFRxcaw&0<a7}Ddcv5He*^f$?BpSeYp2<qJ>a*5YnSZ|pwPT90Y+U)wO
z=Dp(XTB!<KYv7Ts>AAvzv3J|wMrjw;kw^1sqY{%jXMNw-n{v98oR{&N$&nvt#0#9-
z6k3LEr{L$?jCks~Gmo5MM@R2$;LMtI&3Ko__ozuch5OYj48jupDsLYjqT7;*M_RrZ
zwBB<EZyjXH(oh+=a~V7cH2%5xB+DOopnNfTzZ%VX`22+Hb0SZfyoXX##1MTu4R*-T
zrg0m&OS@pv&z&X8b&|jHBwJP-M*BTWc77#W55IG>AMWH2H-?s+ejwY#b3Crkg;RvL
zeDA_bEBFgVhD=Fx*>b0Q&AOlNxLfn{BKJf~RF_V#FGuE{+nNf0s-<=nJr>t8J5Zhp
z_NZzoJ5IZcd&4DsGBZ?4-^fv2i~G3$#@^<jVhKHojlhLbEQ+j`WmiTQBSPN|$1KG(
zHMkLOKe*kX%}}+*vuIgeZ}PaLEf8EYmfCK7a})fn+tVAZt&E(-y1BBoayeI6PuuAY
zhvUaDy^ix(IR)lv4m2%gON>`Jk=1cS=011T(et~G4$DfXGc<(vORNL3v>VU720Xb=
z+bouJN49S6FAfjJVmj|Gi|V}ToT;yYS6npm`A$&(F-oYul);?L!EV$Y>3pE+2>V2E
z!fBelHtxR|7ddWYld5JDC47V6k@x0*Je<!GSo_IXIilSo|NR;^E7*Gd;`RH*Y0cZt
zx%vtSbArfGtIsXn3n~{!nj}HO+fVf1lHT;e#Me1-UW2Wix5*H4jz_Q!x0`8}5=Yc-
zvgSp$${2m&Z9M6FFDS&l`3yc=r+gac7@+q#999jtzF@zyve|K6Y8AleMOHkFCzaUd
ztm-T&YHn87XGZ!s9K$p9sO+SnSrQ!g7CNB~cE^+qfTjJuvf8HK+wrkdbA*+!-t0y&
z>g*)hqbuut<nm1c!s)QM^u_ISD{bist7(jjkdx~L$;WJd|2wTb{irJU%&5wjm#HV-
zOX3&jwQk3MFJ05p?u(0;9k$5D<Mh82BVVnhJ_{KC;9CY)m>K@Mxs@30^d{<YSxh^T
z{=$PgEvpkh&{OauK3|$|GTFCxLpP7hd=zF7awEAc|M~ibpy;2^C)_mMo2%sb_hcP{
zTT=<Hh9`9DZlYm_-1qMN#_G1N_Eno~E9Ui9mxt%fB)n-yjI5MRoF$gN1i?p58YGD7
zU3{^do_ae6gqPA0F<IW!LiFQ`u59y3Gkn=)?s~p+%97GK$Becs)qL*qx+pTYpe+E~
zhvJe?XWsVN{mJ1MVy@`NkhM&=2fVS~O2>tTgZ`}910QDn+|XBaj`rEDX<j(ijJOyn
zWO0!h{;n`L+O|{8@7|(IT!;9@i7jhQyO*;F#O=k%T1Jab0YtI+A;9-&#*489nVFe{
zEZVKn*%f{R>v{3YXREg2{c`Px(!!cYbrQb>Cmqjui!V#=e2uOt3-0UkWvO;^0dG2x
znReW11`2v_&<q5YmNmty%GUa646Dhg2o4*i;e2b&uC=ltC78`zy#DQst-IcX&%@j?
z|3=Im#KCxkdpxi>fT}XXs&RS4Ng5foI8_jtfcEKvO8`}ik@F#=uzTBmOfQ%dC%D|o
z5dZdZuzwA9yBl=YI@5bEf;bYXkhsaHnPTjjXfY3(xDVH-m5*?3)ibuu>u$UCU$8Mw
zC*Ku!6{*uJ`cC$E@J!E3A8tGW*P@udJKQKboxE8Ej?qNddvCfnng8DbuE`s-G_I|^
zEC1Wyxotji7Gb|{!Yr;@dRU`p77ypa^EgXQOeIyDwyFKDb+s71?lXuM9x*g~fka1$
znrC?|^b&lIbJs(((u^JE)tc6COn(Hc>u%`l36Vq)oS-%O7LxjGnXR9RSnbzgR!th)
z=I47f%IM+gPg<W>`Mtc%o)C|@b=O;Q7gsBdv-7<Yw(HU2nB$Y%G!CtHo0mOy6}ENo
zI{saKo{h7e2OP>NbljmZ;pFcjENyooM9oX3x~0z5m7jezL*r_eU}D>GMmM0`O3#*N
zT1Pi{6J>Rp&sH}!agNgYmd-q>uH6#Op`kU8mn9O5xN{$0T^0%-2Zrs?r-f4@JjA#G
zM{OjH?w+FF)mo^VRm;Ed07kFvNtnMZZ=+`chP|iv>Y{=0CE3+XE!&=L!ky+n95}uG
z;!UD%pU}Y|_Y#;P3nyTPY{P8pXj7ey2FL{_N)VpLR(L%o%nI|Bi8W<Yy~2+oRzd%B
z;)JO;GDFM?xypn&0>C@@bScKqSd~QPO|9%s^PR(DtH;TD#uUMK!@B7{B8WFj$wMr!
zgbZmox5O6Pqd(0iYusf}(K9c<x7H({?>4CBV%9KIjMONv(R&ceTb5U>wirrk9>;D2
zPe}t&h0Qs?{SHsj?n~u&t=wnDjoPiC8^s=D>wBfs7QKma_)&(|l_7<TDlq)8vf<;q
zE1j({S9%%(npKyV2(5KY;1HhG{h{AZ^4akB!|Cte>**;6UQK@B)?0{+SEY2(TQlfO
z3g-EZ!Gg%Ua;IsWpj=~Re;0;%n72A2OuCH8r7p}-rfr)0UE|`HpsS92i;SZcFrkId
zO8ADc+-^R$W+!$nw2g(eje|ASkaH#m)s%K{jZHjjht7wsXA^t>!hf-CgR&-o@SL2=
z>YLrY0{G$JnMR$}A`I?Ar-0ac$k4jWFe-(L$KB~OvOT1T1$#wv=pV~c#qo`;F1xO(
z@FMDw_W8`kyz=GYu>ATtOKBC(%g7o^MskE!t=t_YL(S6!VtpyQg2<mW-#7ltmC>2V
z^I#c0Q@k8rg~=(d8X^Z+OE%<T!nqt_Xj#iX8!2dbsX~=+%{VZnBbi3`8HkrlG|@T;
z)q<R&jg6hTZd$$NAcla9g5E)GidnJNmMM0rj1^EBrba>F0tI#+=hZSm@XLXL3A0K6
zh5b%=Q+v_oU)K0;nWYCP^Nm*peN6opU3^O(U9mZN0|g`2YR7UnjP6z|g3ggzIJIMQ
zMqoERn-MD<Fl^}6dlwxCTc2ZKE*A;beNsC6b(Cn}cj`v}*G92lgfI2_2qvr<5i%6~
zV1r8@cy#gOTSO}mDgcP6u))oTBt5Ssey25$Tc7X8_`F2K|Kd1!Q!qVPn*3j?8X-w?
zBzO@X*lSDXdZ|fXg_Bj92rX`$&(3rT_y3dfmyET-At{xg;N_5>baZC$$sUcd_1}LJ
z)RWC0A(U!mr`vBAf2(SJX!Tqrh!4D)LGq|nK3)@PnkkIfwJ{P$sOvZ=4Ls;E+UE1O
zY&OGg_G_xRv8u_dY4!-J*-xv2Qc8r}_XXo-<iK}jXFqrj=M=#EUZBeY(H7*IM?A3>
zGgr4p`3T<_cAE=X;j*F)CS$>%XXLsw4shF=_)BWTwe;3(&PajuH~Su2<0d?2gBx?f
z7*T4^tX)p@?_RiQVOL`zYBpW*T*^<Ik)};^X2$YC%Z)^8CmDIR)Dn!1`dE{*#Eg?*
zM4~VLcKtZ<zV0b=WX+X(_s?IRFyNWEstnq*x``<uT5COHlD#&<v)ex1<xh<;ErVr5
zhp?9c7)XG~CZkyv@`nhWg=>zKzJBv5bR?rj?>i$954+tzWt8H%m7U~_BY^B1vu5Zj
zuk-x9npqII>5Y4O%xRi~r$i~gUw)1ZHn5I#VoG=+9q6vMVv!N%z*Ng^+Fh11Q5o1B
zZtnOG#xBwPt@S%8HPkt+{sZMpv;=lGm>mz7Zscn%_EQYaa{T|3XH9fR4!e|Ed#P4d
zcK7eWcR-e1R0Z+>(%AvH<G-{h(yYv4d)NmsH|5R$&NAw_e^|2%JVdAG5V<I)sBRH!
zG>uensQn$5sdu1n#V^C9h6ww9#V=)v#m-hIH>X7HYQ;Dpqt|I;rZh2gcpj|vKWUaQ
z#ntuyq#5_WG%NYP(hT{()6BQ<a%Rf*NQ4uC020e?DTkkG0mLTXNN$lhsfSl8bIQX&
zd?ZW?le4+jxvYTpBv&S4@iufp;ZiN4RiJ>aT3AZ<Gl)6dtX^?8xm(AA*$K2V>mqeV
zzY}9}c9HdPiCv+zwL?>|b@;ShV?i*hP22C@`)JeS!3ZU>+wQ(7uKa<jn_B#$<-rC0
zhhJxpOfg;PRf>fLq6OU@?natd^TIT=!VUb{1QMJn=)<87{PZyuVCMmwAoU8P<rpwa
z%tE3Wh+)XoBg<!7?Dc9@o^zMPI8m;b)PiaXa-dX~i_~J>h>qBo_5P{dI#zn?HBfY1
z<>Un;Ycd$JU*RAbEgaI7V4DW()s7m1m3@Py4r-_(NsaPe<VOTjfFEtLfE{YipBSrF
zqhjzHnaFoRarcue#H2ZpKS0&%Mk%B`unkJVU*yp>b8?I+HJ2Wzo%UAx_IdH&c|NW5
zjhE;~WY@};Z-tTa=zJ>PO6>;CL1g3KGGH=Oo1XKtnIQW;eGOq)FN<<u1Bok}Z}i$u
zAGweQ^p5?Fxk^%|n2`o0^{a$|EN(06E|?N%TaL0M5)qYi`)o?|MRH?oWb6k8LbeX$
zgY2(NSnvK$Y1$|wRqs$l({v)T>B|3R1EaM|qw<1kwX5PWei5w%sk`HT(69~c18I9!
z2DjLcJg=7B@nAbZEYcdxJC8Ik-@=R(wp{36AU|;SThi*Byw7r5DoAMKU$BsnRmar3
zi-(5)btj#k_}hW!eUlO&HxXTgSJP13P11e*x_MmR%a>h?->h((R{6Q*u^J{=dE}N>
z`-^cDI8}4pC3fojr}nLtDk5b{SpG;bWhz<Hr;?Htrl8zVf0;bn!K1DY4ZKfC3Y1;T
zBR|(Ot9IcXP7}N0PE}d-X7`4u+)ME}^?_vR{zW4`tfo^CZ{ofM0n^tX!eli#-$9b=
zG=GXcUI+6Og$ZqnLibZL3<x@VpdQ8q;cvAFI-@d@(VfU(MIl&jhSjL%aE}1P=i!i|
z=rvfF6(krGIZa}t)uoE0*@@DLAob>_tBk<0wGn06eVbU$=uJ(qdJP@aDIHNH3Wp3D
zF@M&FLNM~Ttr1XZTx+KKM@rysNATWpRe}O2<>@2`wWnkgZ-?w7xkPC9CX=av^IGoH
zc>3u?bR0aNdhx9rq~|4_SFS@OX=AZm_mm8#$_(BK)`>7waKgFPtNc;B;aAz9YGj~9
zns;XVi$}THQ;k0y!gmokY*fh*T-MV588}2Lq^W_w0u&IaYiEO3iRyN967AvB@9jnt
zVp_LX`SV@0qbE8WhBD~qh4o6Z+|%r0h=+b0gVxVI61k_Ozu)x4g8~L;d6<fHQ34ip
zvvb*YFBtOWQ;4GQk?CmLo{ZheSYXZ!bKn9wTf9`ubR!a0I^$8pD_`+cY+t96^29p@
z$1#(^2xa@wJ{uyl)OQwhU(x5eb#ery8DT=T7hFcx3xqSny&TK6tAj4#t-um8<^(DF
zY;;}DCss?krv?fm_WJkw_|{PQ=frw3nhY_U8T`xV2h(`nrN!^0*^1dEa4Pklj4hnb
zyS8YylzisrFJfa>mSKITh5c4}>yEbGGSz6jDU~cfFqW06=EXebTSR5bYy0nfEqU(J
z5Y{keWx3{nJ+CAQm@Qf5MFTU`PMNN<?oXtSKA{YpGm&A6i1_f-!3tEd9W{If(W>ij
zSJ*jqh@b%PN2)t}U9IxrCjX`<gOSZlqi<w;JViQUulYR#uGr$%z^F<cr8o*}++Uae
zBngE#aq$MUs@SgjT$gaqzdLzjz)dH(2GyLS`fm)7cF*IID$OBN-y_v8SY9WbceSv2
zU6^hD1@XXUVfp$d5Oa<+Qv5NnNn-iJ{AG|XO6S2ng9QLi$*}nfS{V+c@Yu|p(~EDs
zDFn*!C%{p?l%4T8kfU?e18o4N)VLkg`b!b&IZ1qBA%+pQ?CQ7hOc6cCN!PTHImNYj
zQ3UUzNRAl&i3B2MPFVIAPb?;APpIXiTe~Wf(YieJ-OLDWwhb9<A7z-2mO9GC$&Grl
zY{Ryl<J9ZdoZJvLXz<z(i#QyT?Be#$H0p&3YmSOt+@LvJi9$uu1Tsa@tt=@q>LbIP
zYY0ZJ2tpm#(#jnsIgko4I^f^I$R#leHQWT51;%fX-p5XIQsBBWoOW)BgCWnHI8>+B
zG?1zULU|a(@BU7e4hU0TKM&RejT$jMP)I7mLc1mNDJ3|YrC?pcYe$I@KOE<9DygkK
zS3(vYNBFDbc7(VNIu?zTrxt6kF3*YZiu3v~Azcb9vY&=yx5hkOJ2$DERpS&=BVr`r
zAV0oCxJQ|Ln?0+MJYJZBygHFy#3X9Wdb<8_Y4#5P|HPa;y992f!j+Za7P3fM{Va8k
zwDw0q91~G?(2V;NYuv$_ci~Kbr5j7--zmSeV$UHm^BvxL)KGW5Hb6;0d%n5Nv*^1Y
z2S@oFjrD!)+6(<h6SdFQ#Qi0<p9j<6yx1)d_(^0`Fan@H)?Z&|Yqkza{d7+&#wSrn
zJ~%)nU^LCYUqX-YOC5IpafS+$tql_ytl|Rixidf5#Ilp(HZ`<+W_uR`Vf#8a??I$j
z@0q5>H54_OZw*%O8vRi{?K=eBJFCs?nqOrIMjCqeXbwT)d@G<<5Rj}_fAUZ#mMY%N
z$NfDQ(;zY5Xz?VH)TYS}ves%J)pQq$>*he3bZ)~UIgqODL};Kn=>d=ye%+-!CUVZJ
z%CrF1GW|N~SsG30z9q_6HoBM#DJX7%t-}=tz@n62GQt1=BzndnKPG01toT`>J7WTb
zl;PH_lD2JN^Xu`A(B>PKF0W2QztK^wpRJpO8qND;WAvD%VQJciwQXrXsw%p-pJJke
zC$*d~*$4ERQy#osm=j^q6)Bl7KCPh-$I3+{*f;03L*aW5RBOX#$o{$S=oC;$Fe(?_
zcPUlf+S@zxVPV#~Tqg7cqaUV8^Ddq1F~VYDx7Ov4vCNkdx861ulB-_|P`e0AFhJdp
z*sb#04Gy<H;q@bYQ7m{h?Zw@sL%Zzni+{T>|2pF(ec4Z|OkxR94`<D4TBtMj3_lq8
zRKl{&*OB`dg<p>yWn<rkiZN^?u%SqcpYx?WjWQ<d?-QE$sk#4XEsU64ql9He%0Kt7
zABe>_6J)z^pI$}qtII`nWjMKV%&Y%}36fGKYq+q_>lcri=vpv1wfiG{aH~M@eh9>p
ztNmFulZbh5kAoc^cevpQt|#xV{J!=okg}pprPcoEt_78YQT7C$?<KT~vUeD6%yA2B
z+B+1op}KEHRSoP!>!&GU|G~U57NP7F9j(A7l)KZ5BKe#)xt)a!R#=|XUatxFqp)%7
z{tyRL6!yvGOvJ!uYTOT@08Euyoe(U2zzo6)*znV>vt^GS5tqS@6mHBZ(v6_7IZIOt
z?bZ=x)81u8hA|lC&Ozb1y+gE4J_k`X>Qk&DA-SR15g}gRfih=sI*7LtBr?FDJ<%TV
zF3I!YTer@KLYV4v6q{(_0uI)#n<GQ6;e(J1_C6vW!94WuaXJwhtvQXf{s6cA-tZ6|
zrpH9*nADB~DDGSu1qy(Tj&fbrPvVHWGLhIkY(k}3frm-zuO__*ATYk9*<Z+0{C5^9
zDO-0ufu_=lNZ_4SU*|fzFT;u4yl&eqN*RM!Z<P&zddS}La<i<5HX=KEct%o@1Nq*V
zUSzW!iYxtTNI@^8ayYpm`&miD3~oG$ws3ZEy8S%CGvA5R4-2|YG!3sdqMX`{2P})y
zN2L5{C4w+>J&gPK*$nGVSMbm;zYP*?_=80Advh8MJ0P{54w4x@EbDyRxol``)?xJ0
zPEB|t6RJyD@|x09R;jY%XbKe?s0y8HuVqNGFr73j6s+HC2*(l<{xSon*2pMk;qfy;
z%GflOyX6^VpQ9bhU2}UI-Q6FIK^<n{Z;bGhD}yfw-?hK8St4r_D1@R9>TEkMt5$<}
zrz)<}p+nF?X#8Rsk?d_m#{T6GvZmbAl)IV`gq1lQLls4=+x7AK+F<}LnyelEjq>gH
zX97*PhomMHeDjDxbDJmVa<rcg+O*#)_HwRayPoU(jor1!a--%Muq<m58vOcfQ0MgE
zxdl*_z|08vo-GscknA7DPO%3-ifX&d>i__&J>U6)dxWj>GTmiW@oxzV@08lG%oGPQ
zA_31DcSpd4XN=RRk`zMXbTMxOutH2eBkjryGML>5v2xgG+N|6gORWP|6V}yWH-K|_
zOtilMAjmV0vh!Uhg8nq^c0U!s!nAD3O^Tfe5$6mEG|^T*8``ewxit*j2g|@ifVo-e
z?UPvmbPH|3O9B8j@E{1_z_Uv~@2ETw#cl+`IW$<N5iPiJIv|K&vSI@(H!rM{ki+9l
zNsIbxg!+54Y3i?K+0ukJPqR&0`KvbxC#YYwX4&wChy+P@`E}?HwH`=yTL1bJ%wmfG
zhG=Qzp8Q6N9#@iZfUi{W>-*&nvEb%sa9$4kKK~%{Fj)F}`xw^ij@^qlkfyxMv{n51
z-L8*=qPITcb+a+TS?nlmF<~EMTsl$%h&>%T`<GuGJV)fpKZW?PcSwI~3A3<1TMmAE
zuQX(JB0G)&{S*HyY}K8rwV1wD{C8<i&G@Hi7VMfBHmQWfWjkCBG0pW<+XTK0*3W!G
zFS-WZm8>FD#3-CC!wlj|!xEpnMln2FS~)R3S-84AF7dcj<~~~`fjJ&G(TSGoMvw|L
z$dJJtNms`FDc{=BB@Z!S!-^cTUNQh1i6bv-_KenV9%8A<zC6?Q_G2<B3h4MJayks5
zUfAC;C4A>s4H<~Q2O{a#rbU7sB+*f?v~CREm<Z2L<!aD*r;ukImJ{?o#a&_0*A$d_
z&eJeK)AJP|AqBzYf8s9D=*NC&H|S>S_-6sT$q~n)WR1}Jt!~-v=V<w1e}|H9puGO$
zpV2%fmb={qaawS{4tM+V&Jq+!k@v~tz%+DkoNl~A0hV;pJ<l96{5{*+Xn>)W<$?{m
zsaDCGhWjNLwNL{9v*+PSBY_7}Xpq$V{XrA?hkksQ+M<1!dnk95r5|6oCuJ`Y+G_R^
z8XDKL+pFDfGO}?{2w)n=<Ymv;cNkJt#u&l~FyAX_@?jQ?L7m98rEBr4s2>aB_0whn
zDBxBwwU-&@)Skm2@i#y`uHWwOtZe`xD58}FT%3a7?N?cVtMA6%j6S&dE{bw5s}ho^
zG(i$qmANr0xeu-_E;MKkNj;s)E&i*WD@#UI<#VD&SK(v<d<2{x+z1PSz1Hlf{0Js1
zEXc6+2f5SHuuJQ+z51zyi5~?Cd^Fk8x@@Rc4~^SRUw=%mm$6{*X9Lv{XO}ff(Mmd1
z0GafdqO*3w*HqOpsX!7a)k+~hhk7t)PM$WfFS>L}-nUS9zk83<5zXg^fbCyns^>?)
zHn@HMLb5#~q0{(t3_UcZcD-ww2SUgLWA+V>2cTQ5v^5Y+jONxF7`Fw!CDK>?Jm+b+
zxMUrUnOv#XNS1F7F~9EjMn}yD^fy1m=jrp0y@F)aP=$yLv;7ynS$?kTtEsoEN)L8J
zufKT}fzt?*E2~C8o&4M=)nCkP(j-1&C0Cj+???f~rG7NMF^VKb;WVBm%C6k1>zrfu
z$T)2Jx3=1j4-HSv`O(jMCO}vlc@yNkGC{J7Z~%$lpKP+iGyoDYq<rs`<6jxIXRL&p
zKL_S)qAMx!{x`$w&1pgOGp7eC+|x_|Cv<=LW`k7yb<W&~icu`$(?x{yF2i&vR<uj$
zxJTHc0apZiWB}IM2Bs74@l^tVo5XV04#EO+bX?|i)O^(U#@{o(_G9W;jMj0mE#TCA
z{7vF8gaz9Be5>=-8E0m@v*Cpe=&SdQk0b*Gqw;V%4G^Qa8u&G=ebz|f&ao`2o7P-9
z!Ptu&a8HWSo+;hJ@j!?8`4_UwWuq)}oI^d)T3=ZvD^v;Ga<#uii&g7&HBpN^x50$*
zQn9S`HAf|Zv_lZZveL;)DmCpm|5UlLr-39!-G6c$wt{!9Lp`MbS~I9VmrMOJ`n8nI
zk8-#XVnEd0%<_q!s7O9f<=?*g*;}F@^$_;hh-s!Wh9~^r@~}c>9PfqPAK{@)pZl)c
z>lo4odp&r7H_KT-g8L!@`%(Z^2s4|1UHKM3c{74n(qynL@&$V9?=|KCHM#vb#{7r=
zca;oWYherXbx7AXmWDxf^7YI=B^BcY+57&a71inCDX0La%)(FpNotn6r@BMfKpe=;
zF@#r+mn&9o^K#{?1nB1J4FnDQ&|AGzuz3#XC)}uiXSAT&YKIP<B<P#Zq>xsOACjj6
z-&cZbM!o#+v};qer>+qVqaJ-fogDQS@s)N-0f(kyJbE+x4S#qg^s+7mV3zD!?ShFX
z3eQ#)alfk8Vf{>!18Sb5wNLSMO&R@jhsg*Q)jB+)su%gTf!PU80r&vkQQhvF05*>0
z8hT|SaDc!ET;WO(xZGA3j?f_jU$PI$_TDw;8fqE*bh)7y8#F~@MTgV#RWuEyN)o0{
zhpRb(B%a^yxen16@fl%n6Kc>_Rvn!<q6X?murz3}Pw!nH?<T@c+yu@(F%ARHQ_~HO
z2`ONH^-r1K15O2n%}Hk&P}6YM-vqc3fT0@+T=4e*%a#q~*Xba$93Yn(XwoD0Qu)1O
zk+0WXy)u(U)kpL{wC%f^RUMWMwJrO4?lOGf4L#Nf=T8+UTS%BFO(T2M@=0YMBMBf=
z4hm%y`Z#36@A*k`*1AMRO)5uk6k0<3iLdSMLf2uNt6xv~#G56AuknYJ2J?Ci&54&z
zj=bB4VfL>oF_Nj~qY^tJe`<maS7W5&cE9FnqG#oWGyOioFLtV!rTByn=Y4^tBU|$f
z&S}LsHQ|F|J&+UXDkWK^&FPG#e$n6KkM*sO`xk3yHK;m9@1Oy2DtNV0ECCwA9Pu$@
zssu^u==~nB(Q5y*U`R+9Ua4rnksZ;9u!FJ<<?fqTrGoC~!oCWILngZX2EGBiy%&Fg
zeg%XIFr&}>fnt<MSKkDHqLpY=z|`ptr%}W=K;pHCr$s3%Ww!1<6LRg+jnF-Cv6)%d
z`WJ+@;tL?1h<6b~Zqx6Tp+!m`b!!0MQJP_nhy~CgZ^e8A^Gt@PCT6Ag?2FuzhQIuq
zF69>Jd%%h7_$%;~5^#)ijU98{hp=b1FHe3vp9X-eUG^p5tQabQD_G|HQpT&Qlj6WT
zliX*Vz%@&%j}(~$M=mg`B-E=$4Mt`<)xQQVn)hcaz;n_sQ-1?=)>u@mXgKGD^cHaw
z{K9#ub`9J7=W>}nh1#$HVS(5$^a8Ft?A2l?Ia;vW0udPaOBLC%+h`8Kz^XB2jvn}X
zI#st&n;5e21-;^h8HWtWV8KK&N{Rs!-~I*@3leBAy2{51xNC|dTwY*+rmt0dfP7(y
zPb8^&a4GQF#>T=gT703{QSjXF>wnCpF1p<9-7sI?*Xs3pWPh1RF*SxT;2&{4!mqgy
z#`Sv-oS6KTd01GS!wL|&(~;OifVhI!C)a;4B}g#?#Knoq_RzS}w3K5+(i{ewh>tB`
z$ipm^{!!rWL!i$mUyfy@jA#9yf{(Y8`hgr(qOOz6F;Z{Nq0~r-{lP@};We(jh-9^2
z?K3W1S3d7{VNU@Q8ct9}loqy*tn9i82HkNHo=9w+9pKeSk9hYm5lBM;gz&b-RKum$
zBXDr-1VQ8JM(H1sAcnoK-mHMxoy%Z5-S=MI(arf)5LSs_Mr*Y>E+C<B#%K8RQGZAi
zYdrY?61jg8ea@_>ZMw(NQvtwLb&D2%Qm4#Fm!2@oRi@_+C*W8o34}wGKJEWS0p`Zj
zrt?}+j|K+sa+x=l1!On!8mI|u%X?<;Kb$Yn*Wuxr{t_~=G#B6if&gh`8Uu`juWpe{
z<6kV$I$p#8oa=*cgV#tfGi&6m6uvBTHkkBs_dS7Hznl(p2S9n0P063Tu|dMM;z+=)
zR+VX6i`0qGrLbc(07{zSi>i`Ep`fI#@_ZtZ-iG56c1bg8v++IB5PH6|(7zfkm|vMy
zM)&wbUXa+Gx|f%_ihfnCKoKq8B>ePRJ6v68{}O(cXHbn({!|KgUj*qI7n<#@&9Oxs
zUOCqKx$q|tU%JAco4XtONY$>Gb4tNBKJrfeE&gZAAfu5(-r#or+u2RezVylgtB~iR
zC}6al9f~V2DU!_KEeoJm+Xj5gk3d&At7i*%N$1Ex4g)`P(NAF|pWM0k+^renm7iId
zLp#6SWjiZs<6uaA9xIh&GAdC5a$qv%5M)P)3vfPg-tfuieZ%0)vHmUNgiP(RKHIJ@
z1G=>Go^m8OdM`8?A~z$b4t5Z;(we1gV#@edYVSe|^ct*$*Ds5t#`jYTNYS%cf}eK(
zK%bE5wTf)2`Ti|$j$a<!FDgyy%|fxXa<&ENO+#7Itj5G{E)1+RT2!~hQ)AO{$##yt
zGM|st=IC&_cp+O)OdTvk^_eq5p5ADzm!W`%bFG)nW_C91lzE-UEW64$D1|ZT>_84Y
zUJ~h-;S*+!?c#wVavwiIy$O^aC|^bGY62xxztdTUr*;7|oun5g_+@jD?DW0qn2<@L
zyO#q$T;nMq$Nt!Ts0IZPhKGxj`e|Jl7D$=m=2QJ({nib9$Tf{1-J!(%>DHpk_|$1I
ziX_;<C>E$^^{)MT0F%km%=fGqs9ekz#2LQ*sFr%HfPyL*I#z$K+Cr_%w%*to@s<^!
z@0~SuOz}ihSB#H*>vm1{%SMdbWH6YpuAQWI5`fN>3@{himJyoc9>?I|EDA%BmXu$2
z?(L)fj0b;TlEL!1qZzAFwQja)0Y9qd4{C}&H}FhGOSK1hf(iM<eQEcXTj8S2u2+Mg
zD2ER^yr#+{45%k+oKqhEdC#qs+S;hWTC^4T!0UAYul@P34q!?aO<m4ij(A+Ry8Qsn
zMV6A73V(Qc^(WKVoM7@)Zy}`VK9!@{=6vEt)SJob8Kla(Qub^x_Au7dZD5CrQgvK5
z)pK;Ul*C)=5$;hgGh+YOKp9N|TYiF2FK`kXvzfiTw9C06opPIkIq@`>g)Z+&X;4;y
ziw}VY2@!X{fi$YZi5wB3o}*o0CXz>qI?wWz5piP{*4q!XR%0tPI==RJU+S7nzQX1>
zd^gy}K9(Ix^fO_YBC9w*_)Cp$TXb&*$)kvP4|Ed&#>zl}59+rU=|&xsKl$09f-TNE
zX)aS+i?0bb@oVNkZF@MV{ESh<38b(|w_S|~#4Ig-lg~FDKvcg;p@9Gt*J(-{0c>V5
z-%a8NLj_0T#h^qkB*q8^=YVA#|I81v3a&(MH(b@OfOX7qdTWkUfdeQ;-ba}Q@FCc}
zi7f_`z31m<h8$7TAWP!t!P~x3Y$8i^1ILcDp!j|U_*sT49`MQCXG_A2TI~@VvL(sg
z043gPUP0SU`1@Xe0S2KlZJGJi#X<Y4Z$=bK^|V%(T~&-TeSRG#SgTqV2!LB9SwP?&
z@aA>{a__zPyd{|g;5qX1QU>TmXiiFB3u|%1gv%BV4}@{lO2R9kg15v#LG{QqUycO)
zZeJd327rDZXC+%vF*@HBnF76>@~72sUG*|j91K~lDOn*(^hj^|{8mifMD!!(3XmlE
zFEigkD6S!^#{eJ{GkjRxaIDaA6XSsLTvfdw{~HAFGuE4D=NohACIfk=k@4&=_Rh@c
zUCIG))~uL=k^&q|YP)m<lv~~5?Joe6ypYb?C=sYBN&R^n(#z|8{^-nwms4AMc7!ZA
z_VC7q><DrmG-u$`;5NbbviH4i<i-i~94DdCo5}oz`L2Hxhh%~Bnn_vQaz!&jG}U<O
zi&n0_KwudQ8h<2{JSJx=5TK_6oYRwpr>PT@pMJ2!@!@56Yja-IEVzIwEv=2d|180u
zkcwq_QS;{}N*n=6d2RGzy}7_ulfLL%-pJD~k)Oy9mM(l`KQ2^(b_WezWmGSoj_eZ*
zI6&Y#MIl36joBW_oY(w?=ZSuR0XS0;5Dk!sTjc3Mj{`d}t^C(svt7`=no{rke$%;<
zcA(k&id>@_Bw~#7I-f;mA@F#R->&hJ%QRKkB+;uWS&(`jXXe_#Qu$x`EqnHhTBn`k
zlIklHcQH?JD#-CU4&eXI<7m#u-~IR_npGF+BUqg2ij%zIlU4Du)*OA0W7;T$qS=pP
zfYvIE13B!h?V3!EKeL_WaA>;3XJX+68J_!1){-gI=^7G|oze{qdDa{Fen|*MQ;*OG
z1|p3)VfZOe;0aB`Z0Ns2{fwCh3UR9=r<&e-y05lx@qU8C9l!c7P&Jahbh>5(IxH|G
z{t|#HI;T(50N7()+mq!4rd12s@Py}d&)0^{b;!s0$C_=aq5tssFWd~D+G|?#-)O<F
zX)9rk06t!L%xr5h5Cu-9HRtc-zd=US-bi`^H_Vosw0r{rw~zte*k{ZN-)#B!5o1Mj
z0LJhA7^^^%R4+LWfFiRdyF7FB7bnoH9w2u6NA}zy<+HM52mY>SUSqq(_kQ%D*Q>M$
z65imW_4?~mTC`sg3Ct3}tr?y12^THzC3d^{xnr}80kiPxZlix87~>WIMo)SF2MfPp
zD}MIfc{-W<ihVuH^TQNTeqR+v`I~o<2(<;G`>H>Uc|MD;Zi2BVNMZ|UYkJwGX@85@
zw3hy^q*BmU_fM5jn+PD&uf59k0OX;kjfe797^Uh5xl~U=1~{2_YT31@*6&ZvMNa&9
ztlNyr(w0PZL_U;j5GOdp_GjfSp8^#A$h_?kKpr-lRZK|D*|w>3t*zgByqCn7IOTJg
zn#Kv|f#D_ds<`!yYF$u1<*1&zW8U3(-PxK1^vU4PDm#K}uZ%5#WdtYq2t)E$NsU>4
z{`?G8ctQ*j+Iqmuo?Cp|x1U`ZZZTemAPAXR*S?CrcMJO9p}_=Y&87QP&8W4j*+q$J
z;df9W;+(j~Kosl=a(Xvnd3j{S{1KPAt3_-r6ax$hulNxghl(C8?2s3Z#5dg)Faq_y
zIO6~q`P+myG+gU@q<@;GC?tmkT(0Y`i_&?qRjt~F#x4eu4=+k9#$HXS>`7!^_mjd*
zu0`u%2lLz3W<UsBA$oH~Z`9?U`meQq8T`JV3N{e%4G}UEp=BAl4a@L!TXJm!XYf=Q
z2+UcHifBjBAF6LFh9i^{nx6-MVJ;%q_f6xj2Aaz$4m~%H2D>*5Fr-2&5E0B677&c7
zFr2Qv*GFbvJ0xj-<Xv>@yKklB8xuoHy+Pnc0h-t^0vj2(*x*Jw6WMv#>xhIev)_!H
z%x!-Drwq4h_8K<$y{UOBDrl2At$fCSv$%21usc?51c>Oz{Bj$emajI>N%!hhmqIFR
z5=1ng7-u$%tM&=!O;;ElzL3V{An0{t|MW8!imU6M&11IlDUBCs_f#k(uw)&$k-<!u
zF@uiV2b7sl`$BjFBHLBp`cARY2;%9=1=62}J75`YiJUyeO<5i-=_{*hs_F(+)vod6
z(L6EYO0&R)6ITN7BC(c2z8aWlDCQP<<0&)_%(@5vJjEmUG)?jFxC~)H?FDnB4KCcI
z02c)LcLlrmDiC3z*?bbILO%tcMkHKRW*pVkp44F?V?37E#AcZn1~jF9Mby&$y{nV1
zYThM<RD97t<%?v=@<jcLX7#cmSalpnd@s!FAZ1Zw#MO!J;M}I_DC)P%t~>MpFg*H;
zA))Y0#r>JzJNmCwW8`83V^HB_UzkSVjR=`6pKt?Z)G!A5SVFzelR43wsro*<oa_84
zrC9HjS-ufVgjGPVtgbeZCeq(eVkag8f3U<#;bo)GxSs(_7Xw$q7&iH{OTWF9XZ(hm
zLuA-VSR?QP1{W|fg}>aS*u^yb>d78cStkW8LeOr`GSKWtR$Z!vIF8Yq6lT>`w>Sa3
z^qjUKFhm!?Y*uW2iBQ0AnT=y!2?ucI?I_Q`R$SGVyjrg|erU~N&t})^@zY?Q53fzB
z@BsZLO8uH2=<&U8gIX^q1NI1kPySK2UpB4zEja-2_ff_CBu<smk)Gn?wgf&w8C59Y
zYO7r`orr|Sb{H;z@BDxKIR5QehG4T*1PrXL-bCQ@2|JXX1wCdGp=E9yoEO1rGx*dz
z6Vpdfpjbr9IwkriQQ}>c0K-+7&`pu(-=ZD*>6C8J4$f(w0n!6s<g7p7pvE0+QYgMg
z?fw}cXLUChpgCp83ugnCHf&rKl&4A&?qQ6@$e~Eu4b}ba^Qf=zowyxNr0@@ND@4bw
zp5nDm0R;9UHH!*xJ7)0KU;i7V+y&7IDUV=!hr5P0+%LP;ZCOBjYbi0wDl30-sQHAf
z*{{PsF}lJmE_*!dylzPzF#N>F?BCY`7$4<+ZkYSe5#sNp2Fj15>sFoqq)EJt4?0!$
z!oRpWfln&z0ROLw!ng%0(`*(;I#4BQ@B{D7@C;B<TSGlI#7rvEW^WybJaGVRobh5{
zrGUo&>&pWViwiZ;5kMPHm(Tvw#t7hP;;@JqD4fh!Y8mdqh0qqTZKcQHFV5S>WJ6jX
z%bo&#Z0oa~`i04^A4Tzeq}kFsY!8-8jT?=Wu|~wrsg!^=E@G$w0opj&-5yt;??X8o
z(A{SLr;VWzc3oM>b|tF1|5+wM84u`aMm@-DT)BVR*cHhMEV8-rb0u+T7q;dxp+S&T
z)J4fPWuVv4)<lrqzn5I#c<!c0riMph0e<jI-^XTr3Ba>KYM<>#k%R4GfQmb;J8MSP
zyphZ%0G(B~nKl*rp_y~c>sa;>xdhk0Iws+%Al9?3&dt|ueLZ1ThNx7cJ<vZ^=PhUg
z*dYG5%1>G{_cDelLWNL6pK<$fs5XiX=El5}v&+bSg@G{)S7`bE>Q87(a{iY<|Dese
zz0L7(b$Vp7V~S&$|Ju?JH1S(zwThXVU45Tm`WKokVGG=ReWJvzjk${FFXHX1``T=&
z0+dM;$B5i&Gcy9`3ts+%ULmJ+-?@X05TNEDV;c;Q95>v0>K|ZrZY$m?s@%BM01LOf
zFhAlm^r&An_$+$acZ257`<c7n`%i-!6nBH_E@tD7;{YA_tsl~TyvAjON57>L)4%kw
z#mzu^P$z(Hr^YD0>=3|hlY8h~VgGR4^1Us==G8B>2Vo1QmDY?{&R^}R6`y!<1vXG(
z92Vrzc7+bi>EY}1lO|PBpSGHQ*)|@_m5CE@$oK)ty$_BuOT9NpB<HD!-B%lOO+wuC
zPlY9n(c{!ZGc$jGl%;+)d>2H2>f>HXgPLAeU<zPu#jzbIvy7DIqm4^Q%X{~6KpLju
zw7}*L5^@1_ZHVbDHpT<io*>c&I_?{^l`cBy%IjpO7o(mnN8931PJi@jSCvXHQ@Y?A
zCzV4DX1H1Vc=7e7mYSO9qt7w#mC*V9@Eg<KyD*-F#m&hverN>zm?trnmf$&0MSu3D
zQ-jy3+g1L;T19+N<xt|Am!*kk8X_>(EMpE)_=p6wz5OJu`DpIaE<Z4VB{0Md+?H1H
zOBF|Frk_Z(C#aOt8k0HQ$>gR2ZBZMG0?p>^ckh|4uH|3!sPq=g{@?lJPBXU`Z(ey7
zX7;S&FYHcyT(I*Zuv?CkN;Qa)C?R&{3*oroKuEUXwKCt3yvvirV9WC%bChy`H<q|-
z+xO{8?^!v^#p0;RX|6@CM^FuyFwgM6E0q^}M{JezE}}@S!Kz_xDn=@HFQ-WJV-{-!
z-{g^poOejtROs$F(YKswW>GLZ;1qoGT%l3!1XT-cLg4}|rtrd93Y${BVBxRaCZf_^
zIx5__4FpUGD>f8pwT;bQb`0BMH6aREW=|y+{`&MYl~Yi5po_=F7S}RbeAJ$U)ncJM
zgVv5o`%uaN3@5@cIMaWl;gT5=NO`1gc6w(UFo!=#0P`BVi7?osA!_O`+Q|~opeD^!
z?~7WWTczNMA6fd=XY^2@@#N&o3(7g^*=dh>KmUa54c&$Lr@#%>cYJMGhZk3mPKYtJ
zewo5G65IfF?i+p?<^H5YUH`)Ih{haKa`x`q-lViL8?9+lz^{3u?b)0G9V>D#G#2*>
zn=DpIWJ;x0Vxm)%fFA}f+_{Wbq-@vaDaFfhiP9##=l#rz;Te0KT&kC9r4FMt`(NuV
zO!xZgY39<+``!H&THVhwt`o6Pnod3<TE%(YP|$2@+`Y)d$EPnbUk%s%5dH1@FJlsS
zkGo$#8ZM`KOHP?z?)^*{8C^*cebWfVZJc}Y#xCv1%_5xV%W?blx@$+A2D5RXKmQe#
z*qYlwAdt)490UBkOKUNH6kwdbPG!r+OVnvX!BD)$hk_muiw?@%e5Cb>K-7@G5!E=V
zR+X53%V0DbL(#_td+~Zim-`V6H$|bL+b5&m=BFUE@Nm1<(4lVNeCd0CawUfx++OOE
zRth;>RXbZ^S{k3vX}8}0DoEE_bR}_fXu2Q!8_!4D6RN3C;jTax68g+mz3npf6-J^s
zi+Nf}Pg5vR3SsG63<+^x8rcD*NkZXhaG~roT==YLn4N!Xn4n3|PxLUGm-oS)O`F2y
zqM^Y3>1v;O^UJZG!L;xYQP0@=_x+`q?Jw0XbS{@6`>T>?UlmY@`jyvcRZzftXNzU8
z?e%)jXI%znuJ<quL#~_D{3R^7)1Cdc(P>WFX$lANOSWo?ZrwcjX5B^>iWl1$1I5U3
z)ZR`?L~X|MM>D(sO{x{h%VY^<`u!tN6Y>{ROxIawXo-4q&%#<|Od{22G^d!vUc>m#
zO@kA`#il2HXcZ<mjstx=nKLv%T93i__2cwY1j99xP~HwYe=<^M9AqaCBbKhTN!1$l
z%AqE%{Us6~Ol0mb2rLg)l2BEt=ABZniEw>t+&*U=njNJfc;T!rjn5*s9b_dW(!sxo
z<`5iFjqp*m_9_?w)B&lN)fk_62pAw2oKm+6r{r>@{un3o%NDm6+t@O|>bw2b3K;w8
z7nMQ`Tn^v6-m*W2<peh^C2WSLJ3V#R((l)DjZrbC{%q~Sg%ZQ1K_#Z`xr}a<POSjD
zQ(_2aSfyvZ6a-01$GiAWPG|$@XPc@7J5bs#!~E4wIeI3z%x8E#cb`HRUXZ)x6FgHj
zJyg!yh$49)(1v0wn0W)l=kmbzL}S0GY47x=MaP(@7vKyJu-ut2UN+JPw|7L8WZg6=
z^d?uzsCW(L|I`SYY_^)LvFpC3sTLvilDQLxkyea}eU=@{jM2@yD6UXD-8_!t3@~)<
z*Xa`-j_?@#>0)f!+mZ#(s1h=s6985Y-DT|bY1x)>zrvXEn_t?~*v_YmUf%-mn?bbO
zdNk~<gq3s>Z6^r@eTGNh*Oh6D@&q?0#Z6hYy);G(Hi!fj&Y&IFQ<-v9U%;HRD#a`$
z(ahoA$KwgJoE+0#0x%P0mZdR1(NdirOUXcLPGr@nNx~OVW%uGXzl1P>{Z|ADDp1^%
ztVOA*HCnf)duODuUG`$(RPu^(a$*ANAmf7~;)-!?V!H-t&ki>?h^K<AJai%9a^4oh
z_hrBP4E^Q9Xv-qkCxjs3)8Qw|*0*5p0L+<fn+_kwg{$=pNW9}2)or3RdOBp?`{ah#
z&Z9hw*c{&N(3%=`?DiT+j$bceb5Ry4$9_$6Gi!!QqglV}T2|a!L14t^c9|)CC5*i@
zbO$Q>8LmM^S-?jmxEuMDm1p+%?A&C5B{A~4%mb)p=_+D0IB|)e4u@S%72G$F%jw@_
zN9AW6^_v!ud(L28QuB%wWsc_n67RR|tf5Vz+aVT{ytrkhX84;h?L>pbsf^`<B|^Vc
zG2+?)65ZMcMq#rdvz8_o$#rW=)d|^H&lqSa<!8S7J2zbi=JGy+52y;L@^J)E+ewCz
z0k8lDI5k#Vqb%=<v^P`OGtQb;G!+{`mPk!LX6|Bx4`|l^_C5SVDU;o@0)n-4V8cY|
zM|5O2G2po<XjAHP5w)aU&1DUOi~?>y%hru#q-OrLxnO1x2w=0hWtI-cPyDmHZtVc)
z!1487#b*Vkua0DRyoFNOpkXbV3so+&#R8)^0S1O8+qlj(C&qkAOi6x|gKu(Ab&>nU
z<eNMn)6qj1Q@Q{vW;nG6Sd~*0NLmSWapBa-)at!I?LJivMB}&lJ)3;>rq@vOf3ft{
zaZz>O*Iz_ZK-!@tC8ax*4r!3?p}QNT8w8|Nx?$)>a*&qp4wXh)P~S7p@BMG!Gk50B
zx##Y^_TFo)i`;1W<rSEHlGp_)(Bg}iZGP(vJ430?>EdAmV-4p8H%S(4x(L<x0@S0K
zLZd@M8Xc~V+{?r9DU~$UkntSEyf;2l_A3C-%IHoU!&zVqyrxM$1IA>ncM>>&OgbvR
zQ#ohi5a^O>{3MLPw1qe*c7q&DxwPQeug;>t+YR!>P(XKXvorSzi^&hMSmSk#MhMII
zzSg%i<?i)~9{QZp8mr2BUkb(eGuBr%64iCXbdi*5l*ci3Tv~@gl~QoIb;<<#N{xLE
zN`sh?HHY@CGD}X+-xM3ryPbm$>Z-VbbPP*<vx|r21Uq$MGmh+E?p_mSSd$Dy*q1QC
zy>>WxXU-YXt5I89s{^Vru1nE9F_V()?fo#J+c#7}9hcYmxa9BrE>g{uUOmECPTRh3
zmHO7k`u%@~N1pzdS4li51>rhOPi;<hFOAHm9CiNaqcStg6XNM5B8aN_yqoifi`WG$
zjN;M^HZ&5?U}lfg{%!+KykVdN$jelYK`$?5&8ZdphHdkaw8q-iNTaimc+NeUfwl>E
z75z;-UM8P89$|N>Cp~@n2%VAyPe02?31QlXAdd)Eq@%C*LCr(2B`GMtoJ<DH$=)oD
z+pZk?W-Q>%5?<eln7|bpA3_kED;3em0x?Ort}E6vK$C)Aa54-o**JYqXp3qC9lfj8
zPv3hR$ckRXAoH<!KfT7~m<ypWHp3T(lZ@6h@<wyA6r4b?CK*A^z~B@KF8QJ9MVnzg
z2=f8`UF7>L!{ez!S5MX6llWmTZCY)bN)y7BKRo(B>!Afaqd&C%@zlF?+UQcCkrBH2
zR2dAlRFJbOX_j;DU<<i(m2^@9Aa#(l>k@K6m3H6pJsof8p{PqF<$zA~OaOXe!75$4
z2H>n|F6g8$ADJqO#rp}<RAsUa)&}(%Fof-Zt+yirf0=@e`kk|T&%Jj`EZG>}0JMRJ
zbst@?5x@Rp4!d~y-oM`BKfCrdj;h9~?{FZ;@YZKw6kG6crzimCv$yg+GN5NFdE56#
z6YM1+)vvzTgfHB}oR(dW<vM7{o{j3G<pk;WoJ4t{zeAEdb77K0l-DZ^?-=mHdXRaI
z-5?TE@-ST*POY_Z(qsisdOjtg42L+=J0h@{PV8l{F^H|{aJ@4a3+m}SVmBjJv5zVY
zjK?LF>^%j7{wqpN|5+6VBU`hVyL$f&%h?JQ4ixk*&s*HtSUsAy-xSUMeshLFt0dr0
ztQB5f<YPXeZv%RFXHL_vqG^=)tX<y63TC}o5YZ}<{o-Cz`oLpRERV)k9YZbaHi}(#
zO?;Ns^nNhEG@fq#^N=R}VUzzaKj%VuFSQNZLlq5~EL&9?JK95@2;-S8DT4FDp82Lt
zs}8wY?Dz~))STP`?cS9Ef(W4%Q4<P7GFZZ|XF9SZDAhHjfg1rHi<13U*BO6g1R)k)
z$ysIG&%Wq}<px6LjT$JA!={GJz;KnYX8iI>gcgqAQ90-cUVkUW>{ot+5>A%BaAG43
zDtWAkY&Ew-?Nkuo&GgaAc72+8m%Cqoi@eQinIJ5hkDI77(ygO*w*Rip_gqBKK!7)F
zM(MBSWR}=z|2*!fr5FFc=2@fi?T7q8O(L(hC8g0@JR|T8U!yzUEj&zv*=6P*oU{FQ
z2e>lU1J|6dVWzng?T4Yfx(jdD$G0WC)dBDdA?Pr~wdXgs<?z)NF<fENfy)&pn?LjE
z8k0M%#MYDhbSl&#E;w8xwf&#sl)Ngq{yW{527XKP?|(I$VmOSmrH^()8l3`HwP#nt
zrFBNSA4azW{Xw53pL^)Zx2v3_SLp2_!wLgEYajDauvj-$1;(ean?U5b%KABIsxVdW
z1lHkmEZLHxjDbvE#IM1r0QsfsF3lUf+j>*D!i2Geix2bP;KbX#&L3%38qU3wuA_6d
zPB~|69Z169InM?Yjb$yU$7c@-yrHI<r;RZ1(dLz#_530ZIW@=+Lf?0V?h|H@uWodD
z6HkzZlgnJ%`1jGOHQQ14UG($Zx<KD2?1qDT;Lmx9H?R9|3=N*|E^FynXvbHy89!+B
zJ6i8SnkI;`d<-=le<O;0d8zv|Pv<ji`B@3Mrf+Zrh4U0squ*gW>uFavjuPrdqmS*M
zJo1^r5PXHy&qaWeVrUc4VR3=oUKUL)&T4J?9!oGpk~jctSN^?fhMT8?#Ur5hvqh4<
z9C7+MpL~>+@uzlc_GuEsi$Rmh!R%Ic$2cjTQ~+j`F64cGxHW%9nf3Z?6W5r+8CAHz
z%f`Yas)ikHwoM^7u++dm3aUIBP~|O>998JTg<{xED8vb22|DPnSd_oKoiAlu^Tw=*
zi2L+-b+FfJAP`;OxK?Ac&4^a{@Wd2c*x#vQP~|^8FEcoy*ur`A%>-<>*VSNsf>Yb?
zU8*<AO^*FBiq!{Q=iDJNN3ASoct*G1)vG}Mj%7Z0IEKthS5nUn?2)yUc2J^AiYn^R
zlHC4jAIh$ub<%I1tt~p4Yp*+@!J;>CaU0b!4)#H!5N}iIJiE#)-Kn}6k@RUG3pr(R
z!4UqFtW%A#T+v!JNdjIQj8`vT^Ml@cZeVC7hI}^sgI*@b-p1-^-efm>vl$kb)eX(m
z!|WHxQPjqIf^b40my?0x(SY#!fQAe_=Gs)@gMZ!uwoZj#(__oxVzj7glJz@5Ezf62
z@YfijS(#2#O1MuRjSelo)KNXxQJMR$m|3$+UviQ>^@Db~b)&Up{u}*khZ|QTs%W^*
zh#(Hj$Eo;hB$oD&M9`-><%)pxY<w4X8tH|AmP#Z=zHl!3A+}!2e2dr$t?Itp4Pzd)
zB#|;ia>2z^wKn~Fmy&7@x1kC8&nK&@o(xJaa<-4YbuAduOY&%O@fiJZzy-(c8jXfu
z<&a|oHEhdo4==Zs)6PfKu+Kh~GAVJnulW?2OQ9c5Q=!#iMm$})egQOR$kf`4ncp;(
z{@{AXmyG|u{k~`28>1?o>1LuhuUgbER5S^nP<d8O8$0P|F8OUf$djNK_C`CCTPusB
zfL3Yhj|HeMOo*9E4u#$TMTKkNuX4*jBrK8&ex?4Eb&b$!ZkVGDYD`{t)F@u*3y-A-
zPZ8I1Wzmn)-L10)%WZzBn{C=1)Uk9UpTplX?a95?z0%S97)atTdCQa~r-7{}HE))%
zm&_Y^eL9V8$WkdL{X#xVJhsB@8=isd0F>9gv)q<`y{v*piI>sz{WBr#{58@{>kdQp
zmYCC7%_Ii$LD=sEp_+Q|SW^O}X1hwQY5y!ne0oXR!oN9j|0;6JDUEJJ4s5wMd8Wo1
z8T%kQ7-~+Gx0P{WsrO9BmCIFGCRi2*Fl;-S8wET&be~Os*<?fPfGJZv)HGu%o|E|^
z88Fi}`V3Df*v+*4U>IO~`!aJuhksShQ+O4VMW$o-SIxF=nh9wCe@Xt?VjiNftK+t;
zq@zNBA5lF8JK>?Y4evt|{<8Jg^^3HP??I6rI{D@fFU7oP+U<Pp?A9&TYT8#n{8Wy;
zfR;+K)6Ov4|Fs)ER&s8d=(#HCjNr}fac;h70MW)q+rTG9x<BB%2-IZzbtQ<FpUHq;
zb0K0hH_vZy<He#GzI)?Xaim{E=I2>$erLAp^R9Ga*lb;A^9dA||G;e0ubj_d?99LD
zS*Q3Ei`Dp*VxXbBDIREahFD4_dIT~6bIq!?vSyJNYkai_%`hwNk8iw+xix<|vS{6`
z{oxYzse>hZt*Lp6yf8wk-aBNh{h8GQnES*cQh9ck-deAKit!6~njOQs>njrzHlq)o
zuXJv2whgFDdD6NoS>cKIM9)k-8Lno8kt7eis(Dy<2~?g$*i(odFXySf?d|Qi{UP#3
zV+)Jq6*Vff$)`>gi~$yyIUt=55w!-dQF3bZN*^W|B6=^1YGX&%J5ews=@k!r(<^F2
zNhP)|si+R}de%>5b(r!{bUQbks$;ZGO|X}k9gDvDVT5u*m9igv_0Jv?tnbpj<~=2e
z<YUC2mFxAY88qzG&h?NhcGLgn5vThZuCh}Bao{no7Oa#}ag^6nI>oS+)4(zlNiB>&
zh}rC^ELkQJt@<jehHn!h{;bTb+NX1Op<HU_IIDQxJf9&(YUK0KEW}xF_qBy4O|+-d
zf^DJ7C#ga+@5nhg>eq#~*!sy{IunYYl(0OQMfIx($Wb{>$VwrSZ11^Zwdf1GwD3!d
zF@IXX=Cg8k=H+aJTsFS$IRT90h|5=GM#43KIJgk=y#+y8I4&#%)Bx^<uymy;-uMyS
zzc;pmI<1cU>W8r$bAvmAH9ANGJ``&`W1#F{A`^ia0wkwYeZYd4wM>MbtpF`CX!*ku
z#xep*!ZlX48-11HC=)RehmUn*WJDuC*p`P4`0A|lchZ0zT)UJp4g9cx4lvM$0O&*a
zS$i%kUKbfYcgzsoIN&w?p3*|repJ=}Ez-ng5K~96PM{8qOO)SXFI6F`ry0A5y()12
zY4Q<?bzqVqBBM(3y&>N|-@oTW-sAGoGp1|lC68lwFrxVu=^tDJCKwxZ+aY{_ZRiwo
z8wE~V2WLSSF><kcSdu)rz;_`%iKl%NsGnEP{MAsONDF5HG(u7-o9f0fA-=e%n1K2B
zr!2>_PCl;BR)lK-5bQUP18Y2UZykF7p7^*ciLBBOdG7j)=L|yPEYIcd0s8FtxGA6o
zSZUC*Puj+yDw4&o7f%K2MC19TyBe^)Wp@4cm~P*k@!F0?;VS!4r%5WYKf=gP+buoS
zL|di*w9b}{v<cr#o@^w^XN7(1uOjx9YONMzt5M-U?xsfJ%`BEjXX>&okaZi++O)m?
z*}6tlf*?m=6ohND-C6hCpR{hofO01be@-*G^kwl~B}#s)og<bmk6@j=p%SVBzWgU#
zJ%5Lxd^k*ztO1lppE|Oz=FF2zsr5M1B8zfOb$f**(*OhAzjsAFy&wS>OGlO-rMisf
zz?^{b)4EG%nmFof+>XQ@FASCow<S=qdW$O#f^k;KWf>t@z1Y_Yf?!-FO@0sr%F!|g
zf!pQT>~tN6-z{@bDUsR})d|817|sLn06(itg-==z$Pmz*YQSZ-HS;S21oFU^pKriA
zUB6%XG8u5%7+KQH+Jk}X!A-&Mx~LsaUCVwzmN}#oZMjeem@`{&&e=&;IBp2c6i|M0
z1KZ;$f&eGBoIP7(f7~hKL;ivt)rI}4<V0}oyf8b!Z3@-9fO-8gO)Fc}3dmQJJP5$G
zVvN;?0=+&4xu-p#5f^k2XHV?=pDf)AIm{lowYDKmDptG%R2t#2W_7?;qD9Lb1S333
zU>@CT!nd4*9Q_i4EhPF0B58Q?O_`SkJ7XjFnBpY)7MVTMIO=;Yr%t>Vkr}5}gn^|4
zm|Cu*EshYdnBZ1a)x>eda<*lUc<ung&=PTN=_{=#Tk{|Eh7?OtUKeW;n+}<$4I-Ks
z#<~e#0e2_YD)KuZk1e7z787wmupRZG;139mKWDto|FZcyd1~hd4<h#DDlqz4Z#y44
z0k>mr(3CkDFvirr_;7&==`@rc7a=&Pe@O}`!uWM;>aIOTLLs|0>~PoWpF>ohgQDNZ
zl`D7KBwn4mAFuRg6Hl>iYRG4Wd@BUokf)WkovlW@mGy7UQ{L2KCUnj&=KsG9uYN`>
z0FF*^5)aaU+c2u9eY=&^*J!pQ>pzYTHK2<P<)=9Xe$?d=Ww$j{LTBbeY{M`IseE|3
zA}NB;lhTnzh}ecSq<DOBGQvd-LCvzPSRRA^y+USyvq}wWab-}8OY$62KkRS>^x_`q
zaZL5mA*zK$axmvSX}2R+s@BC5{<FX1@ElP^E<@WDX}$^scH3vvcsILg2}xou^g<4|
zo#zoHspzp0g1kGM|3^%xRcc)4rpFBv)X>k{^s*6QP2rE)=5x6HtQn)b!vzp@?6;xF
zlW-{B*0jV80P>gq@Lv2s7>(!e(8YxbKr8%tCh`$TkC~s1$@H}ko!TmMhWApzW?#Km
zD?#YOO8yWDfu1eF*S=i9Z~U@NXf+W7+6P^A2o~6i-kT7(op*7yXe;d<8$5<zG$j2w
zWZbf8UK)hXzZz!xK)ZUCpY#R7d6kdMq6)aL-r0nN0Ru<MZ7q=j+)$1j6wX!<?2Pz*
znh+Ff3Tq4y0fPlhaD!?!n#`0;v3Q$Tho-2Xd@+l0z`+iVe5n)nQ7~!{(t(4+g%1j;
zsfUl$J7`#jR#!rZn0{pc!<C2yOy1{j4kMQolCD_C>|E%QcaHn5h3Ao@KhG}W9?;9!
z^v66Cq(SiQe&H2!NdDlr-pnGbH{%{w!Jc1GW%ntLMq_JGZ~;|v)<ANH`@kC66V*?$
zS)Si$s)#-5(h(ej^gtT}B_!KlazxMd%;yUe4l*|bt4elxQ>*0OD!sHh6pyX8z4CcD
zSkRHy_?0z2FZH=Jv8Rr*X=x=&Bt9vFPeF~Yi@}Naq^yEgNw~?3<>Q;xU076eBom&U
z!rlLx=N$%DezTi7co5bn_};ZAxTU-F52=4L#BEHME%i72jJT;mn;=6UBo!97UzW7s
z_T=3*3kETO;oJ-lOtQ0J&}K!N@^LBl;1qnvk;(YX#~50XOba%xM0NczQwxjMZeGyW
zSfFst1Yux&b(fl%fm2Hhlhqe=9Zg>^YRiJGcW66+qt09~AD`;H{#b2On>mrNWhhs{
z)*myU!47U{3yYxzPY}$$eLyu(0vgOw+pqY57pgOH2X5BJl>ynWX}O=cXN4k$oG7wR
zx|aAh0V6(i**=j5(5<e9_zkQLz^R@#-od*-0>ZL-`w7siAd@?*#(<-)Y@CHDU*d3^
z>-MuPCaPxsZ+=NF-|x<D-ux%woZf5mH{{&lCeZ#q^A$mY#1}@x7l)-|i{;D36=XR#
zCAnGfFH8kCUTynKO?w6=s;h+cnT)=^0^j31dARBV2xC74$(p=@Hr^NK^cfI34{ho_
zBATRiL$?wO_d@5zzRj^*_tZW@2QNT4I@DGNRR3=tGMh>I;2zw|6zgA>k5GLsFgcFU
z84<<%Jck?xPa0lbArDU*+3z#X#)=$=`UPfEXinL<oSj@-x18tA1BMDuaw%2Q4`w(}
z_Ix;KZW3FE-9|i_n@cLDX!=?iPBG1G7g#l2M#ikI5fmdf(-cam)4WvVbC9;^d*rRO
z`(I-^%Nr10i32wB+Br3+&AaH_crnU<(#lI%Z#U!llZJ9yd@daER@zbctwJDQ7-Su`
z2;Y&0auCBNRq0I}NTyWvx4Gu<f{~8Ui$Ss+5ZhrY<oW*l7CFCDNmmqHpfOgz>ygt6
z1l~$p&+0@2gcMR|@&w&Di-it`(z&2NqNZG*m_Vbr;fPaW8z*N)w>r^Uq|<AD1C2S#
zDvA?Y6W9T0?IbMs?|BBz#v44g_|mfm6J3bc<Ob<L6@~qZHX2|!_z-(qA^y&Ry#ZsG
zot;~#qxYN2mnBIU+zVV?2$Aw&(c5t#1Zv*CrU$!8se4D0xgNl53bHs>IG0_0W2`=)
z$lQ7I))`LnwnbY`m`?p8M%cALl}fb0)lI9Ts7qU<i-?21%KZKoh$y01HZzAZQok7`
zV)_78+Ofiw=$@!oe!D?mL~ORJ&7<W>ND4kas-2N(2T?S&`*)-={pdkR_44K^$jc>R
z8o+!du6YZ`jt0-1+%KCIP%co%o6I&SO9pJT|DCC~VFnXH-={J@CGz5pG)E6{_}$(e
zGmz+V%*Ue~Nxi%0k}H}E==fX-#(pv-q|VJVzJ|?WtI|eVWk+tnNNuZ(q0JG%y;Vb(
zd_n*)-~YDzfG`Yb#wg=Ptd{E`g2qEKmc|D?^)i6wE>y>;*Kh3ZBE?)R;QoevnUJA_
zZ<RQY@PN(9Le)m~8T9#^8t)Ow6aRN;{Gb_Y<3Z7!;*f1LN=)v$w?b0M3x_3yjP;>7
zFTYTE^=Q}}(NAnS8=+R>{68VltG{d7@&Ugc&aj)yFldxsTNh|z@WgO07-$tq^3PS%
z-AM@+%b~C(Vs6X0jj%<|s~*LCo*&F(2<IvI{8W=|kB8iqJki?MaJHdghQK|s5YiO2
z62)Iboz6ouJ=alcK~z#^O{2!ywafIJTNt@E$ODxY=m4*SnoC$Em8NCoWN}ccX^E?+
z?3)Z4s;g82wJx6{N%qKoh-wAN#h*<;2H0w_8XrZ_W`Ko)E6{p2g6ILo9%H#h0(@11
z*zV<&!zz*E9XsI-E#o^p*h~o1S_T0Jgz2;!(2mpmKCDOT0GwZK6pBc6@jaVh(-tsz
zcDANork;eo{-ygU$kH)OXHxQJwHXdl7sL&df9UBrmlF#ebqzv!{|~4b<5H2w$sK=U
zK(L0-#42<TI`98hdO(c$m!4upaIelW)IUMpY}sBwm>ei~`t<;Q?ziyL#Q-j((Cw7l
zJ+My!{h8>Ug`#9zjr~MXT=ZF~DSASK2&zHS6V%R3>T(c;g}&`vYlF**)?cBIkcXGf
zf`~mgAN(r2JZN9(>bp@c@BP5k^@%W2=)AoXFq_CiS^z*Ewdz842pnDwABKPhf@NI;
zP~V*DywATM*eAGlc>1y`+xeDQ5|)Xa{u?Opx9lE6F0u$=_<^O)ySlv!I^+Y`E=Zvh
zTTH^>{`1&x2<6JJk3Mxbc=4F(sEP?`BHjoFNVFW|!-oPeGr)Y41dwolRXYRT(|lnw
z0rgK$B76mQ*j%x(`|6us7&A>aNvb&aiMEcPR(x-0d${S~$p(|`_Md*t@*246-t!V4
zchaQGO5A@$wDW6%bjfr7Ib4hB+}cssMXv_u&q8|^DuTdIIryy-1q4t294*N)J0Wd?
z;N-V6d!fVzLtC@X<M9_UrThP}^OR`hpMdd7{MIF9vdsXI8sfcbfFnCYC`H@DcO%P(
zYL0Xir}bn(?HAG!<9d*Xrb(|5Gn0t}cKvELl`I?2ar`OoRX(`RB9B?Cz;w#ep&bG(
z{~eSo_AV}>21F>9)msh_9EtH<FTaGngi;XI*TsHt0X;n}_z=}Kvs?vxf1Cw-g%nt$
z=v*e=Afctl?$xaRrJq$icIKRxC5<&7$r{1bsSw5SN6lq-s$HFzfg&)Ix%lCOq#!+?
z=#s2*ffo&S^xp)WdPpO_rJq422&1L04)#iB<D*vJjR$VQ;ZGKbxjgO$WHnCf>5mvN
zZxKF49O*>(z=<fgN5PsP4aDG>w!U3laFa9;<o>sHul8}khCKWw|Cr?ww;z1t!KeWn
z=6IN0NdO~A7U*OJ${hgV850Lns$g=*dOI9K)&;6x6Fz;PoN^GQ2A0PL#)trdTHH~2
zv1p;VQBVfGL90F(DBku2in;O849V6^3y@n`(ax^$qF*jen3EBJ;^UaX6g(;M398j@
zEvQ#=dsz9y*(vu+E{}h{sEaV*^d1>W>TF=fvTW>Y13TA$BH|lxdD41hGwMNT(>FWX
z=c|K=8s+@}_B_mTso~z=m=oFfMQWd3mg@4l?!6#T8~`$<p9aFU24~L<>`N>P>t2fk
z0FNlFue`nkSP|E3cEk(rCMEKy;;W(7kS)2D*jjQs-;2mxmtk=<*?BjkwP-(zY{f3<
z_1bSyz=%ep?^i&QbDPZC#eV>HG@Hz-8ghj(-$JU1gXq3j-`8pZJDPPBuw|3}t7Vz4
ztOMJ(8%ddsX4_*h=jf8@sG7DarSsCX13zl;(AO~;s-fEA%M;-0xjRVZ!F3eLYUFG|
zEnA2ML;<BQNvE6ZqoyHADAl57rscE;$P0KKV6fL4<iVrfEA-n`{}Yv6XPKG3@FxBZ
zJs8~oal9xQ2LQ-|vnPcF);L}kN$Uy(DXoj-IEg67P4|vn{-b{6fc$UAU&4U%=Ez-z
z-(i1GQIrYDa=zVVjbSbQ)c;q?(TAwHZtylXO}`50vSDpg*MZM0=83S<f8yDqUCru9
zd1XJ=65c*c%DB)P>9Z0<lrJ1H(ivomS|Q@2;Us5reC3f%*viJ?`P;FVQ$&FP*4R$j
z`dqB$&p5GTyW7X@_0}LF*~ajp%AkUyy9he4iDX0djl8#k(#&JW&mEj{fn|KGHE_9*
z9$s%@njvgI`OIf(Je7@l>`tp->d9*|G;#()3>I4B!MIs<+nFE?*eI>Of|Pzi25JGV
z+px}{O5M$X6%j=(w*e%U!e2vX$L{<&{rHN#CW39B0e<Oo)zVkrK@o$S2o7f>1XUs%
zZePi*6F%f`o7nsnHc9mPdCmO3HwFf*C+&N;++TdythMCSdU8Ro?M4zhMpFMn5$GFk
zp4?CUe5h}je@QElb0fiumaY6h%)f2BMWJ<v_w1*`-8~b08;yRB|ME0v%m=N+wPN08
zc@c}IFI15c|052`lEV)VBep0(FZQvTaBKGUkUUn9P$hVmm5f_dO&rW~wsvlSEnXL8
zsDz`SCqIiYINDJe<Np%Ln)UeweO)OUZ_R(q(v}?+s15_R9*-`&t&!4mTf}?gArEdW
z+w)>|pO^+7HlD&&b=y?az$81sO5Nk_9B4E2d|(55?lOn{a|&NX(j4t`jRiT(;$wwl
zecZ*>gpi^DinNtGWS~n%&|VKQ&~1XAgaEl=4~{{q0+^06pmr97A!6VCUtSPFvKU_>
zANhByK!%XBc%98|&F}nBmNpydnm7~);jHYPtr{Q(=m~5unJ9sJcK7!Vu6Yj9qMBRY
zMp@~vFA2^ozyV^NWXgsB?1B!U;|0a+ps&Cl32e5tuz((9$1<nidJ?C|p1+4onauxs
zaE(*)KbN&bj>wDqzMcV!*9Z1UXG(Sx*ruxtP?SXA@650uQY3@1L=QN5gE86h0Fi%b
zq6MO|##BkP=$-}%;$q|xCPOfx9bEU{e4X^glDG<KFPVY$3_P>-xpD+9l<xRtsX8yt
zwNeXzV!~fA3>S@+6?jbsPN{4s&K%^>cwtkQDgdu|^hK&EI0&a@OF)Fx@e$!1Ub{4f
z&-AvOYg0L4L4<zcxB=u?c9v{F8_VfF?Yz>1`fU)6xH1}~fL5{7?Q-DWg+_buBrwl-
zpV`~yr5cq#vG6`R?Va(Eu+4g#=?->4nN9xGTri>N&{E-@(S$T_hm&NdLWlC)n=G+!
zg=P50bg$N2Rk1y4nY5L*Z7}6#(A2T!`xV{$;`<^Q#)*(vMun9g(ecG&3*s4PAqF<c
z&&3O4f4wL03PDdMl%S1>*fzJ)A%acf(wR8;OsdLu0FBWl<dE?*2sZup&so^6AH<l1
zgP6J^NjRGFr|NNm_+``FjRN3u9l@95+nJM~$Mfd+D_w;07YuJK1gyTRxiEu|Tn2tI
z;mKx1uNy(dx^^}pDngJ7R$C&^eLZ@=iD6WUA?iPUmq<i2l)=-^4$)}!{!*W3Lv<za
zOQi5}Mnf4XbUCgiHbMs}bcD*AF&99y&Pkms;LuQJJkT|4HAMo7lM90Ur&h<6wdao7
zug$&5MfFSGup5fb5W;3V=Lqv$F+oY$C%6PMnfu=XD!)2#1MJUSo#*NFnh0DTk#qzV
zWYg!oDP7ZlHeCf}c$aprW7xZ;YUTa{^t@>9aX0(`L4GlGYMbO&kYR$DKc`7~9nzvd
zuy~kT40fwX@$<{dGRkP&wbS=bWH%{8GiVe52eMjWJGBD}Ja^zzd`Va~g$fF8G^aq$
zvVJxAkt+&6@>8lPMr(cP#&CFQAEPt}KjYWaegv^;m;7(JFKCy9Exk1>kIKK>vH+62
z*f-=_pbx%+sVjuPq4VK&eqJn9t+5oQPSWUwLwQi@BzHJ}#*+vI#^<yPP8^&Ole_$T
z;QSy;Kc65763E@}r`7PN^KQ&ohq9FKO!?Q`CXNez9b-G4>fSZF++qR7O3^1kNe|cI
ze6+6Bb<@`?bJPitd<KNs$#{BE^Zg%xWbqEq|5(QE26lc*%r;ySnDu&h(>ZQqe%-v?
zrQW=Pj0y-lKWZ(EH`r&7lugzlM(Uu2OZXnNWt-4}onL%#CZF{*VRx}7FFoTSosu+9
zh&?3H6V(!+FFk<1u8@w(0s3k(>D5zUlBT~-!#&a?pJ3xzC=uz_1ujThzG(SDxh;X)
zDw-hqli1~o^{-}Nkqj$u0eaH$)qJ#I3Lw5?n0kFby{ewgYQ#0>!2nDOWOPt0E(Fb^
zfmQciGLZz)u59XN>OYl*=Znt1C<NAbE=;>@_Ivwb1FGw0vO4g<3qmE?B4oo8n3TY=
zYs3-I=OHNZJwRz`^fl)jnxH8$-|)f#;o$b}%{B%1s$c83XBM8t<3g5yxRiR8H!Cdh
z@({p={}jFM4LCiNUf!;7P;)3t+ybdu8GAG9f4w$Y?P&8(i~FP!*JZx$9s&-Qg7uaF
z4q%Oxz095gpiXWN3nwB_8~Zm$wqAiqc068f2(;jF&m!B};{;8P{r-3-Qh1OJgt~*F
z2rMg#8Zfrb1|=sni-Y=XyWRs|1eeZtU*GpCvy!1Hp!8PCY+mbItu6@u$Yfouh^^C}
zdWC{T>~@!M1o|PYaO3VeUGURojs07gP0HBC&p6rOCY-TnP%uZi%<A`BO?k*iBRH#C
zHO3DKwPEjd{rvN3{#Lkfp5R@1-saBV)uUPaUC~a!Mgf#0DQhIH7_lrT;y-B%tz@46
z(n1Om{xrm<C3i)4mJ@!>zBl^Ew(6>%<g+4hX@P8l=lS}%k&|>?g`pCL0+&1i*my*t
zssrNuGHh9ei*vxrhPQwY*xr0=3Hjz{UK+0_C0+N`Pzf`TA%zET8XXp25(kfBH948a
zl%!yyf+dUtwIn8LHSMP-3rV=|JbBP2CUp;OzE3<sVsXgnPYVg^t895&5j5)+wtU6u
zpa_u5&~wE|(A|mSHS>G7wD%TByW5yNg2)6aeHbny+%nRi>DcrMCZdOQr;v<g4MGkJ
z<_x;6`vL1I+~mIq9~Mg&GI?W0bQ)SJjdppXpAn1=@%<IyBMwre*mmn1a7$kG^__m(
zwx9;Rq65<19XucpdzLo?oZPJ3#Bt7UzhQoA-=J0GLOW*FR~UF|-)q7%f(`0i&);d&
z)YV9UW|1laQSW93WWFlCGi)4<3>=3Vp<03sjX+Dulfx^~@k~^#`R1hpIUw3`*EMsA
z267t5g^%ar5hlDnS2ANpaRm*it0rWg{J5Si>F*sRm*u!O?N<En%d&`oSTpix0&EWg
z5P$G(IRN@G%jBjqVkY!L6u_dVk?Yi(g6-JpB~Ut2)ZK3q7vyK2c#z<d9FCj;sy2US
zCc-gwT~X|vQT-FGlmT+$DrD5o5kqFhQMBqo|JcB;kziP3x5!_&o1Q8vfvg)ER@rRj
zr<k>EyLrn>9wIMpw~fA7KAbL#v<9e9+mK4c)~o+-rmOKsXH(!h>^=fh(;YO|n{Zi%
zs3wH%@~Y;mLjMzGeL{$`bl5=PYTs@lbu}XBOeO?OIU1V5|5bs}5!5kGn}-)XIY0?r
zK}dcUF*=gYhi52~AbR#-bc7f)6~ENGxc*KoVfCl0{()&XCe|joneg?dfI8DhP{shM
zeDTR3EyDgzX)_p%4)5hPLHEaJ^UtsREI>|6l?_ES&|hpX$!m^wPNeESEUj1W&MyqR
z(0b`>XY2<UJNw8Zj6xRG9e^vAWp$s5C(ye$UGMOZ0x2^9QM3GrL|7G;a6U9}n$eQ2
z_x#Q^W%}d0|8upeqgn+$>qdhhJ~Vu2S`{lwX)i!ic+UyI)CC&<mk=29akF`lZxDO*
zG(^E-i|sGZ+>K$DXvEK4U)1-1tCQ^-1Df7%bp({R6^Q%0{si3y5tF`%mV!nYr~1PA
zE!+Hlw*w)X;9~vYvuu|ZJ8w*N1z8BIPam%hsF~7^7wJII%?%7+!bA8^o}PzB5V4q8
zR5wSkqhjs0ATLU)ou0&cPJYO*8#{rM1k}C+b}4~&;lqH&#**g~jku>wj2Xd@BY?yS
z6X3@<j3iK(n#OHwN4SvOI?t@?d1AK`qF{9kjhz73RF<CQNjpS>9SlMS_S9H4xf%HR
zx!=*2#>%h$F!}}bNotF!LQcZ9aJagA2maH`ie4KKdu<t&S?->aKK|`!sH9HfxqpeO
z&3n%1sOxH=Ugk*hsE-<hD7X}yQ=cFbTI>9e8Ek}tPl*P2FFmUT3C+N5z@dAni^g60
zKW(>|9Fo>EF{R;0%35nPBLYNi2I@GPrvH(Jsz60b<F!B?_j7HE=X3c-o|E&0TGOX1
zB(cB=M;SQb453vQxgjr12O%B(C<@{wk`h|rTJ>RbH#d)$^``+I+rurnwq|r@@GLKT
zw@L<G8cSAgnyfmEM3)At3?F6=GdQWsgS2QgAtx-YO_wGeH=h4wz*3C-nj@`9ICi3w
znAo&Da&PR(s%R=67-57>Nd0)NjYlFWnj&9m&%7^_90d{^({xkq*e5?=M`G@mut36Z
zmKsw|?Huu#KWqA6qX2(JF0o#=?!aR;^YQmd3RfT>A5RmM^lvpRNa|FY*O8<{+5
ze*b$&gS&~ATXsZQCV*Rp&%<;lu6HY60ZL9lxHScpGPcEJ;IT!#1;?i@Hm4@Tz_hXv
zH?i$pO9G?^3-q`GUE<?+TzIhc2?-reQSyPcdSb0nS;#M=(hWByD(zNNtag>2Esob-
zWWa~8UBcdIg-@1v3YQbc;J)rLV6oygEPm>MmYL4^Of`G8`Nf4J)C#cl*J9GH=KQg#
z&w7Zh(?h4cHZWLxZim7jM%SW9>@F8~PljsU5q)BZxuF8<F4j|4c(2~0+}Yh6@i^Qw
zA$|!@=NNwDO{wcy{0ETzO}5PnZKguZ1YQ%HGVVs@oN{{#q;cU#aN#UT{BSZ!2g<fz
zVi~X;=ZiZ-383T7Hj&6Db6f>RLY4v>$2rbEej`MzPQPjmv#fY~7qUN=`7mSY4kN36
ziWf+`9~Cw&@$p|hcJW&cqQ0aw_0)ImngOi)&49jx-bo7}3{G=OUT^uC;0DC)D*+SV
zPSHD=dC@di(fwI>=jYn{7&brME!kiLdFN)S@J?-~`tvaG`b~7A-H+3`xKx=zKEtuI
zzdU9@iquAo!|Bk~AqHjKCR(K$CIYg(wZfsM3)fBTlecP3FLVXRuOyxe@8r2zzM6M4
zSPS=C&%)}mx}3Wa^(eMMV{5bBmUW#d+@1M-y?OxVT`965fe2d-Ke{i7c~_Yux`FOr
zCQNxXTwDKC;DwMymT*+F!yC>L^H!b?XlBvt^p0)358wjQv*#h>MADxbljxABWfr}T
zZQ=*R)M9A%c$2E+->QMx*fxd!z>h{joT{Kn4tfCHK+5jL)=~M(xiBjecby`s_vjH9
zR+aHCrO}GL^g`$--e>$L_7c#qc<$Xa)X*XEmFu>F-%ZB7ChxgVr6@|T#tDH>!L5eF
zu+i|le$`8|<KH@GwjYA>EMc+Y=Vo(zb!oltmez}){aB|bg3Q%-G9trv-2c3OqO;@;
zypE-09n`8V>R0VR{#dx@D|RZNBPmU=-SxA6^MC|q<|mNyiS5HC)oK+CUtI|)6F=*8
z19ndaJu>j;l(6w(KdeQAD|~J(b7#nD=|uj6ochvw)+_X6zAD5dJau89Q=V^Im)9hm
z`YiiPl7;O>8gf{wfLY)U?7^UpvJdk6rTP>xkD+XFa!r+P4}9EJy0w#C37SUN>T3m5
zF!7d6Cxz)&R>1x2+s!V|`UQgt{VlgqcsA8^uTC_+@P?9*W2&O+MFFgtNi;c_6Nb9>
zUL@|*dXc9-Hl<X4i!k3=#<yI@bVH=O2kQW##WfTBT2=Iie=D6Wd1H>nzc;d|razY9
z88^1JcC&|iLyF^`lF~u+&><IzW9InuuY&0YI39hwwC=SwJiVx~RhoV!PAXiBS#%mS
z+ozAstz67m#B(lLJe!B`y84qt>TV3$(RgIezNUXL%dcK)RoPH8z4H5IeSzO*DGy$!
z6k-q7Uh-zid>9vt+Vv`(ug^&p$jLakn1wVy_ctDIFKJPO&WW8H*o$5i`l|PYC!Y$a
z=<c>k7WsvWk6z&09cMPL#KK>ON2cAE>U||j@rzUMDktfl)q3EOt2mcqam_CTWuLc<
z;aR^ZVHzAQ`{DK9)fx|fQr02*9ET2zI&M9v{Vu$&?gBhaf=A{=_aE!;pV$@OD&jFi
z=lbs<f3AX+0+VJSCEh6Mcx+YS7L-DTxHjLa2t?h|GAgP0&P}3RIJQJIc;Tp&B7H7)
zv)H6i<mZ(Ab+QDgJTmhtsH#t!$vuVQ)W=_ZWSFskXU(cW@!~}dmhEe_4kQOvQ+HxL
zkHkzA%99W)M_u9a%-I<YZ@!x5V3l~I_`h*v%S)m3Eu^$lk&g`%RwPze6Kk@AQ!??a
zgMEaW4#C4eQXF0%i51DtEy(<}-}@>>Zr?Xg=xwijf7&Rsw=Uu2us0<YXTR46(Q`1h
z+Db(mBOnNG#WQ$z-#AT6>jOhs5MTSbXwpIy6a8g%sV4s4@={WgAl8$kKh<V1%p=$c
zX~}>kmJyUYp{gD{X?iJ)g2`Hu;&DghcF<O-Vrb;AhX${NWMZFX$|4G*5SIEFgEW7Z
z88HQFtN+ExbPI>fM{#ClT+OdGw)R($VjRb0-gzORA3nml@*QO^VkK2Hy+5|kTW80A
za`K%gn!OjuL2nPn1k1NfVJx{mFbyGNyhYpAA5F`EFTIuVi0sXmY!6v_>uBkA7%PP=
z5*ewcc4V06Z8#%Sf<?%Ho?tO)dCm-Pk^fm&6<<LLT@4|RPVq7kX2hm=W^bsy{$&y3
zVU_$D>T4z_rvWLx%Lk8j#Xip=Z<3yxqKq=2Nx|DjUP<r>8;#<DT&I;)q*+wR>XClp
zO!5_eFcQpwmuoZI9n?19t(qqOiX<eFlzJzDN%ODnJFe9-zvj~A{2e1+5pfvaeag98
zq7Qp{MfQ6q>QPpSmn|KpH7{Enw`Kl%7+#ahLgg=c+t&i>viKo3f9PMonEGbL6fe5}
zBPi1C-Fn<9?0eOPnhln|iTY84Tm%{K`u*0mZhI<RfG-<%7ErZf7w3>Ny!B$yJk>gG
zMYzW?ux37iNNRnA0SZ|ahY(!8hh#b>s>ain^gkoI@#hu8x~Xzz`HtWDrG<cxAk+qW
zM*8kZ<vbwl@SFV!pAKn0k?1TrlRISft4}9&@7z#k{|s{~lIWzUaY6%;%qa^RF=Vq@
zkk}SBh}DUF_U~5slD717;Q?LZVJ{aBU-xCMp!rFb<*9<o`m*~%geb2k?;$sr`7UOu
zw{5H-@kjehZTRw9_V2`#<*$j+9FWjT>K_n_h6%%U%ge;6l^=P*_$2b7Y}G3g2@}-M
zALoPWSJ7*tZ<eEyl7w{nE7|t2M1GklnH3x}=-E*R@|l?zW49#weiR6mUPN1<qcS_@
z6SLd(W8gElz%+dMC*$E&Af8k13_Ed4kd>){c0v0<-Y?9Fg;eVk4moOk4`~+RW2UKL
zZ?<CUuXmCX&Md{8a?}T29bS-PV#^@wn%4=rq2G54o*56|#WZ~{%A^dP%hQ`xusLRW
zUe$ZV%oJZw<;nDKMa~0K975N|1pa>Y#H}SI1^%+x)lN@-`^3x<tIn>&Gt{fUEtX#H
zkUd;<f25#>K>eFWUI@1Kt71-Tj}xcP{8?Oc&-TpHFdo#3iLv0fEG3*bsk=8GEJLJx
zQKe#ZOjQ5aYfb5;ig{$JOclzK4`F2@f_p;LDB`FkGeRUNqWh*f-Ln+~Gi$n2$_miq
zs^0AM$^iY15f9hB7Z3Zmj^L|V!o_WM%@y^T;$2SfTC0d>zG)au>IHyPn9Pxsfh8Tn
z%(=D8{31p7w`mM!H9DY%_<NAOY9jurkf;=R-p}`4MhegzL4W!Vq8VH(9NI+UnI#R%
z&|iPRW9_R;(+@dr7GeriD~G2qRM*P}H5RP;Go2;Wt(d_2R^@i5z>^2oRoLcDbA;Uc
z^c0{rzqVvblV@}=Mf_gPn;yTeeV_6xNz~qTFNbm$m4xN(VFa-myFu};f*e%E=f|WE
zI1=lNozR3sG0#nk_7_zFad{io$Xw;hfu(2TxMx=#NoW)@uL_!<aeji$wr^$XFTA!r
zXt{ctr)2A}$(^ko`es*_@5Usiy5%eMYMk$jMfQSM5tZs&GVz8)G!5(KaLR0G%YDss
zSI;K{GL$AfI)VV$Q|)(K^*;N-iydy>WM6n;Z=UftaH2sCno)9tH7Kjq>>t<RP7aS#
zg9-L)eZ)Gl!B#Mw#u_zy@(s9)1J*bANBccfLWAqyZVfmfatf6UFo^UFFtD}`z{yo5
zp}xfhoIYEDgfjz=+q3tWuj}Q5;~1<Yp*AmepM;F*?(0aIj^ihdvwlhGIjk=$=4n8W
zVbNzHHB0#V`n~p=8n*#hh@3aX_`0KPRM?`!(TU$jhNX4)Xi6A|5+l|XKFo<u;Jnl2
zD93n0FlAWsvgI_{D7r=MlK|08%HylMdbU76Hg(K*nOv+Y??1ia8NBf^AGe<t*hqWF
z&nA`m-qvS(S<iTwK9G_1ew1<4sU8&*hg^a5Q-Aqg9_FZl)7Vxs?z~uW&iKJmDpR~#
zMd(5F3d4R#GCTv7PCDzn+GFJ7EJ%^>nLWp}92B65uB<KO*yZhkM9Og0xh*3Np${fE
z6o(Y)Sl01s51)*mGm@mFI#Jttl=N%%kUM*VLx*f6<yWdxLdx%%=~HS;7;L~zC+HZN
zlETDS{c;pNJYK0nnYx8y5|a9Js#tJptp&H%c9674>&3_Uel5N;Ua8D|HWp?I-h6d&
z`BzPYk&OxU@Vw;m1GTfrYilVvaTPijPHAuCT$N#aQ5mLFRj2GOKTSXGKwINq7YsL>
zN!njq6J4pS0eToDVe)ow(Jc=zzlS&2l%E`5p4qV4c+{iMzE?j6Nv@2%(Uc;U`=v0J
zx<LB1(w`jSt$&+8n-^4H{pf!>{UbzDJ`pRfM|jfx7tA`&)|k9JC@PZ(00?4o9;w}m
zbPM6Q_)j~MriNpg3Xy6)>5s0A$g`%2F-A<EHF~FKzq&(8_$rWElA1NEVpfWt&Ux<f
znc2ju%jwU;QQsj)QDRMR{TeZjtdH*^#H}fcGeTJs%Da^rU@Xb@jq|1Y>iRK4ZFWI8
zPMvFZCvGB=g)~G_U~sVTXhNF-l~r7JT$k`Yrv<)c(?G7E=}EGsqk_uHveZJFXfp`h
z8kFX{7^SMV&v^K>?cKBvNY?cqqK<Ui5`$TFo>b6(gOJzHYp;L5h#JcB&Wp!fjtZeq
zA(f1uAoI#QoSboBU68xN;`u5?SRFh^Y<Ifh4a2EFcPC3~#uI%;#D$Nd&)O}U-6t^y
z!>zw?NB+pm)&2b9C9Qm0`1JJtXdmf2Xd`_T+>*`^N<g}53au<Fz<72DBhiri(`zRq
z%ZGfdlYc@)5kHM9z;2yXLdBEzD8`qimRVAqX&u)+RGoa%YiaKC2lHJ(%%e2n*ticL
zMH&s=a;Vc`inTqQNY#&SGRf_-J-BTpbi}WHr5APVlG@fMeAJk5P<e}tv;LuubcGjw
zz?tqUS&^;&RT5g^19K1wHE^0{W(}8bixxfSO~z*+W1V>GN!il+i$_V8$~oyL{PX3b
zkm6^9f*GAs_8*ydPMb67F>1C2Bot*;1Zq(GB0{n^T`c^r!XQ`Me!OjLoOXxgmanm`
zPL-^M9C0YfyBq{KF`o3#sU+`d7CTUvKrwP%PvoKUegb}Ni$%=F)Q7Kcr8#?g6#c{1
zScZT4%E=!UclQ6F%xEJox>j=}_JmLj%DXxbVrUq|!x^P(zW}qX`(5*RDh9lT*V3Po
zICW?e;Ec^t)VceG??0COpcmDsTl3WU<tU&lknVfmG4NykN$d~GVZF*lcSy0wN9yOs
zXJK|xT*4ol&Se4^Uwds+^4gxoE=nj4?MW3b>H639GwyqPx;CHcL+5f8lhZ3MHIF$C
zhpaZg3dM}rVX3=(6ZI=KdU493AT4$!G+RVBCjq(`Mo?p`lCa-K!_?Yd1@x0jiI?fR
z?vZM4x;8ZT9Z&t@)neZFWRz)_hQrI-2b{On+u5^Pad~fNSOh%ByK6b+q=%nUMrDUu
zsDnFHoMxJ2BdtJ}q`2-<ZjLg(&xil{t1MJLNQpqS4omG-2SAIy^VXDhxk98+0<Q?Q
zHPI=fh5>Z~7uEz6s~*!0(-c>9T?38pnME&0S^Zm()QX7n+df5-N=2%@A5PaDpYHM6
z`6#@|<EHJqGs)1{aFkh5rS{=DC)YWuz9z3Ud68sGJIkf}X-&Y}zpQIt;bg3w^Tf{f
zr%OL-PaA#SI>sMQ^~c3^^$}~vQx6{`pHbRLZ8=~q4*s=Y`zk{oXF8AXc`D}9B~|b2
zP4K0;{tX##10I^2^~vsJ0EwkR*^)q4{I=eY{BTl|iUGFTgh__eF&IuZtD2C##1;_`
zw3Rg5`3;IvMp6hv6Oy!ZQe;Eqjh_40(&;q^TUeRFoN#+x_X4vRX=uto1XpY5SneEZ
zu#~%y01^7%at|Heo0JEYV87^ELg-Hp#e<{7fPFW1{w`3V9Evd5<iWkCtp3+lf56ZA
z$JI|2YQLEfU4Mae;GZso*vuXMl-@Psl^iV>a(-|tw<9gnxxagc^Fh^_$)<%I3Q1D-
zf0;e(c%NqWF%in3>GD17|C*?VV4bHCF?gk_>yS#fWr*Ib=##!H`ubPD?m#G|f94Lx
z`(54ZGbp`gjT)XS$y0=jAIg;(TpTsOP3dR!5Vje6ak`~s$Q1HUIlPcriD(VA=ohPZ
zLxGSp6G0It^Os8@I)&l1F!Fdcwq?@IdR^TVZAkHr<fIqux$a~*p$t=L?Zw-|Y6!qu
z(=IzBq>5yR#$~p1R<<7cZ{(pB8m)s2E0<%aMMlD#^b*x<C1<=c-R5ICuiMB^nc|Le
zHGccpB!2?Q*TGz%!r75`q9Xij8X8(DBc}?g&g<&!6}rUT^5HhpcOA&+b_HW>$yYdW
zasDbDKh{m=e(r#0#<RXu^A1<D-M0PTQ1+2pUH?oB+M26ukMpvCHlzd<t>5%hYJQjI
z)D4^Y@m~0Zk5XuTym}6XfB8}zyj&F6@B@ENBkMR{@ro+JH`f1i)t7&>xvO)xhDPd#
z`9f;ZRJKR-4>Rua_7?16mm#2{;X@e?bKImv6%JQpZ;|NvAdA<A%|ZRuTJD6E!z#Dm
z;_jOgS-#&Un6&^IU5?7+TV&6AbyBIlC6jKzMTfdKOH+m#x7=4h{D+K?c&2H|ykJA`
zp?d%sb$%zs^C27I1#f=S;DV;_{>Nmm{b7HaYe;I_;prm$ul@K}IK}w%HL)M+JI(>F
zA(?H5$A$hsVAl^)q9$EKxn35F8NE?jk;OtuSACN0CrbG|CZg(PMI#}cr6_MvLysDz
zo~rnPlmxr}MTqq>rcGRTFD5I1k6qUcXOyk#6mTN}ysu4%-jqaNyp1IC&Atc-{Rk`W
zQHMB%_;ygxmv9=Dt(EXN06cBPN4U76-~n;kqi|KJPMy)|h&<>pK8E?1)i~`A9lp;-
zZYMlZS%xbkK)TCKPBDf8fV*#=*@Ps|K<YGIvkm1vAZG-xu};AMo5*K;*h=c>jLWlD
z6~YC^&MN|vl(eZ-IZFRWQ}i-x0{(Ro(*yGAIm@{<fZM<AyL=bHSHDX9p4|_Q=E=@I
zLI&sNNgNyYlc8{rFpv|D{VQaNGwzT?KDUG;Yl2#c2mCgSLBA6ORDR-M^{RwZ`6W}t
z8u%3UDkF8TZtw#R>q;JmU?q#$i|;a8W%c+cB?yv#zcc+t`LnAt?GVNjJP81Mm)k_4
z8TgQ#w?Cy%Whw^*f|aG-(ieYrPbV#^)B6#Qw&>M3&@$@wQ{QL1Zdl;Mo>>|Ay#vol
zazK%>Czp6oTg(IZTdWe1*aSt6RU-6%+Ulh5JR8G}>hdIiSPR9%gjU9B5^n|x&Z@iT
z23>wKiDWOVbXhUfp?#3}WKY;nY^Jn5yp!z^IL22dOZ?$g2ywD`F>cGbZLApIl>INg
zB0<s59jG+cwDNpUOk?ZdOVG<P<T~D$mwXL&v>|-S<~z78ceb&jv{U}ZuqE{Fq~n#S
z&+)I81;W^Ra0;hP)GYOy{_o#y;NQ9egD-nL-%XD+R1bhzJx=zwdz@_8I!-o6A5Qjp
zF;2E~)P7=?Qu8Zf`zrVOmjJvy=o+Ptc+i7a-c}0Q<==44A72j{Wt+XAK&@EHYGf&$
zF->%R^$Yhscv_y{#;9Exu_M0dt%w+-+r$n>fk?|L^cYR{O?Z0t!Xq1~2DM~r_g8*V
z`h2W^Ma(on;fnbVt)41Pv!H{7BECMnV$`uha8-QDMc$>W>DdsiJB82xIQJI@zGH5R
z$_$kR-o<=Kr`CkL|Kk0@&QP+(y_T~w@I0WFUb&~_M=b~Q2RU(ia@iD#EzF`6I=0tG
z%W_P|f)&{bR{l96XYS_v+hOU+5)0xfBggdytw;4`guLm88r0vbuXWUO!L-(@=U@q{
z86f|0_Rha$4l3Z*cE^C1CtDcx<ow?m{kAA3TM@URoJTNigLC<sC-IRcZnI~XWlTT6
zMVf7#o-nbv-p4A>Qh1*F!Ed>6Z&zELNYN7XtI0^u!{#oWP{K!1)ISDxBo6Uvhgw%+
zheUh1b=;yRvPymM^}Nme!<UDzwP|ymCLMWY*7p@B+2}v%NXolm-K*7#Gw)?$k{79{
z{$kE2w>{2O@GknWYn?K>4+;@+!GG^2KNPv_%^bfn*4|v3zbb(<?$03m`Y~<Hn%Yxr
zI>n;y*&=y?#dVbmet|nCxk2iufMInpv2VzMEspAw>Tmwp=raHx*p#dS`|H9|KQv!v
zp)u4$4Fbs3#qINR1Ul}{)NMh}FJC12tM-k8i(Pnqfbgdm>*}?|c2QYp_`$bT3<S2v
zOl8-5Ua&!izWP2!UhhQHUSiF)oFR1z9m_r$!S!gWg+QBws;g>>slAPsvYT3&KzO9R
zeIKzhTRnUyo-J=wAlpx$DoK=#h|3j4gRwb`tdYA#uzEF&l0%Pdm3xMoKp!TpsnJ#M
zvzI+g>f-q<pLa$SnXG{eRZ^;XD0_qIBQYIcha<F+ufd8|N@m6cqoOsD+rU+qm=3k&
zuPK*ZkuR~3IqkDsliwGAO1Y@^evwxa(su+YVeEO>-wVA%`+tnRQ*<a%(=8e&JGO1x
zwr$(C?d;gLZQI(hZQD+6zJHwmynGM$ysfUT(Y<<gb&b`tFilc-i@<)HVW-M7JfEgd
zwBJgsFBU@@<(5p)o3+2J7&Vm#rSyhmT(1};B}D5ceO5oJ8!&a$-3=&yDt8DBVmQ|O
zCe}I&7)3w4v~ezWMJ(x4K+;xKD{Kc|F+2}VNJ*B(J0<7Ht2^f3a+2R@=eRASZ=AL_
zN70ld99PsIMyS_<j90k$--6No1;4V$#<EYRs5<hPz28xOCr*5nC-4(8Qsy*)3i0Hs
z9xR)t2uV0JRaVqA$YM|UY7MwMgq=k%Ry7T=$@d8tGjMK5@IoG?L<=t%GzHt}VYW+Y
z;74Yq^<n7&n;dDC@YVKM75m+tB%zIXNcO<37>sl5-sF=`ZL^1g6+WNcRQL2))z%*h
z(qFZn1}@9%hzBW1w(&|Tp}9HVdlb@Wzdc!M+nRb6f!NJU8Ay1cYnt2IxT*i$gTN+l
zR?cBrP@liBAAd_4twBz~#_v3?!c`M+_Oljo=BXL~R7Ec)4uU4>o-k5tyYrIlQUBY4
z*82BF8>A9cQQ1_b^?@3toHI(2%QF%Fym1LfGz7biYT7!|N(~+<jf7`ijPSR>$w>Nc
zt9WBT^=8n5x|pNztLJ!oZ-h~H&Gl1@7ZfaFQz9vh3Q){I`OBDHsKm<0RK`I0+px!g
zT`(H~vrrSQ7q_L*V@cN_NfI@(aO=3d0n8}$Ms(yX-bd3`i6Tl-5g!J0ZQ?-@ua;0&
zJP0zkF|NXBl6u?xhwEW{oo(tfIio#sysFyE^3L+mtZQrtV;fDrx_{(2obRwSYT?y%
zpQy#mNKmvxAF{ZG-soYNRm~-Lgh<rNz{^z9xB#j}-ZGMn)oru0g3GM+UW4e234stx
zqfS`as12j-pub{hX!Vy2Ui6LHd29^B8qfva9_)g4A!A5%)28TGY~3{`j43MM;!HnR
z+sSF(#e7zeVY9LtwP<NQ@QKV!B!Zb42X;MLRy51<ZE=%e{Oh@8S!359|Ngq5ZL6yU
zw_m}W;YU*}Y|xINiFUR?0l`?7?6ZD`ani2?hc!yLF2&Y+MZO~3mwKPf2+<6`+Gw|H
z7hz2H5DNSK@R5o(>r;5%V2wTk55q^)m07D;Mo=d%=q^-n)2|p>ZsVNd8*o+u5(i6J
z-ph3?J3|-Q!B*M3rS#7kSZ$Ha!eOa{O6gf;ElD_#0j5EOjI6PP3?F5@ww&kDll6|s
zNi#BMcIMIHRKzb0OR-fXjT#@Cr5cnGpyl#mQ;}hIc2OJUp%i{A=!<EPmeeN@20D-I
zd!-VI7WFmUq~jmTzVaAye#_zQX|3N}?P6Ip1gEpx1>Iw1WZ`3ZmPU1nQALTDWc6k`
zXnuFXc=qJ}*297%D8fs?X84>XS&9}>k`mH=L54k9R+LeZ*mThWd^3ueL~y1kcQA6V
zNfP<>YXla%N5?P`vtFDf`_gv?6sWboKb%Lmx5n!V)Aq=sTW}F0D+wIpu_Rk_KK(4h
zy&+?^CcRv;_ud$FqhnvR5+&W*I_DPFt`Je5ch2?nc#7jB63ZJi%X#(8oKh8{dcF!q
z6!q37iyP-nv*;V-cKvo@6d%h*i#ZoPzpS45#D(Nhmv$%WL-_KB-GL*cn|N-=P(*Ea
zU^oJ13MwM$4!dXAxBn=tewZILJfEjcn_8~ZhcD`L8Hu3m>Vj33R_ou46Gt1SPBNQg
z&2yts)6;@zYBU35=14ZMmwg0d!>glT)7#>sIaYxx87^0|?zb$W$2y}oGhTC}an{Cy
zD4Ny>ru%4iR>)YNZ3)|0-_4A~M4$B|t6ImHw1a%~LktUVSRJhnd{J$U4`Eu%-WdSp
zMNjy2t0%2K6ThF{9-j1iy`Io|V}2qk8VOjxw^D0W5j>_MoI+M$&oJgtcX;%^j_)Sf
z=){3JXLz`KMsj0$vLd}cVf{j7a=-6B*ytGSvw69F0X&DFvri6M?R4v6k!W95G51=h
z#FoK+-ZIg&d9}XY-Csz*jhXjtyu4L-b$CyfYQ1;7_cx9gG`f)95OZVZ^gF+=wsuBx
zZac{A#in!X*|vQUHgJ1S>}4BpV}B}k;(AwI3_f$!veM4zYC5{z`6w-)a$JWI9%P%X
zmRIz;`{QM@Y+`e7(G<#spgvQ8kDq<ElqYjvhllk(YJ`0gb~g99r+a7keiYtsCNet0
zeI{lIg6#c1$u0FRzF%u3z4b^!zp&EmnXk6LZl{;Kvo1I8UJJfmH!5=fbcM}Yu1GHS
zhU0lV;CC?O^;ESfVXndYox-&OMlQ$m9SqKGc)h)s%8WLa7JN9h*HU~XD?$NXI6k3j
z_oz*&hT?Et<uepqzZBdy>wa)z2c7CpyL>3Ezn+=_J)b9@>WT8a5Vagx`d!Dq@?Xv|
zVGrg;_TYn^@vY5#v30s??4QE=!J>E<fZIu13!!WmUd{L(>#t5f$OC_LN6j?Y#Ts2c
z&(Gl1s=)`ftw!+u?91EoIPRmFR*8s|f#IC&{SsVr_U^76g!7#mBB@CSz{7A@HXXQ_
zssom^@zKqVg;1HZ`xy1MFmD)j{hgAyT94hDOR<-7iU#|btj*l{Y<cU9{;Xr#`hExX
zdUcp0gLxCNTC3K2uUWmlnLWZ+uE}<*#=AV833VB0>9*6{6TVo*{ZaQlw8QVbM57(!
z{Z6LgoEdw@%?)S%-k<sVXwCV}5q$T(_Z@(zx0S&o!zQKG`*F4^yM3aws<h$dY_j0?
z^ZHVubg|v*H#qOB@XWXN=Q?Ve&dqgkaEALL_WsOjK?eQh<A#beQ-n`k>$z^ctk$6#
zEs2n?J0PNe@5U?rVCt4lA)<y`YChR3SKH%2U?cZO=o2<2UG2KeHqAE;df}t+?6({>
z{!_ts-X#U9cTY~fdem&rYOg;=W)*NJg%2;+Xsl-IBVxx6Xe$cNn?Nrzppr@C^S<7~
zbP0OfjTmRng)(}h+~Wo=Sj;}R&2u#y?V2d)Lj`8~3~6rX&^F>CJ)OW?E2I}CS!lNC
zaHGBn{o@P{!?r7K&$@*oWfUAaVee?}A+_P>#!hssx5%dM>e02M1a3Ha#32(4;wdvS
zNN+FQU`kt(OLk|R%*a0i2~TWz&z>l#7sj6ILF1|;J?vkR*^{VQ4Tf=Vjd=aDCiB@|
z-AFGTFZJ_x^K-RLAXGuaOQSxIQ;W?ETj8dr_kj`Fa|ZeL+0F$E(w|G;xjg`{(ZgaE
zS}Gtr)=|}k)~D8Cu*5Rf0Xg(%hemUs&&1C#y&7}SUSB8Lco7kl9}#QpX%>mxmFL$J
zy-%D>y#4pjyBCxn6fPhNCFlZj1FRK$IcV9-oY^Qtt{Gp+2Xe;7yYPavfA!YY;GfFh
zZCBSn<Yo5<yx>bCJsde2iR?KG^2H-5EV$cThgaSfI`E!zwJ#i$B`aXXBS9^Mxhi;-
zQ}&OMbt#5Ky`E3kGroyGbgWtTJCJ*t$(8TzB~9YsT$?Gq_Ij6wSBulKuMpgf9o9sz
zr|vFnYPaszvrr|=E=>q^U#};gLt1W8Xuc%esqyq}v=I|0;~|WQ>8ftB(mk<5@Xy!B
zv4$J99sJ2SfqRZVsNk40%=V%55t{+^d$}cg&RQ4C;HaX&a@EnmZ_&*9cOEYSm)AWX
z_75p7Ti_90?!;yGJ<nczV>4k#r*C?_j8Ts6mK9Z(J*cT}28CynWhj#kHRj%*lF%UW
z^*{WjSX@p@^G(_I!4I2;4+o6<f44IHl<E9;d1}ATwe75`g5oWnQEi_klK1}-g@(<9
zCh>v(U8R6g_!Rw4iR><7>(`anFm=m-nlXG!+lb~cu)5Lrv%37?VTP((JDmwHhTQW6
z@AGIr-c$gsyhQ8DhA)iSJC~39{zNrJ@M5C<{yCqL>XRKx!1j5cNS(^%>-6#AsokP@
zo5|$!Q~KfFHsvrx-ge@W)<$3PZe9ARZKsbKtnG)-%R3{x(ftzoJn;jNMKRbju{Cyb
zb~G`tf&3lW8CpWJFyPbU|7XC&#?H$4f2Az{N6O7jCv9SD=4_77!ul&xq!YEUb~bUu
zrxUd{a5fP(F|so@;pK(=Ki726>gHLCA!c**wLK*|uU}uCIPzKDM%zd*FsfIt##Gmj
z4Rd(GhH_~YU0}kT+OY!}{|BwBQI&BXid!;78YIAI%Sh<AT_16S{+;^W?bG&tm3w)8
zwScnG-TnUdH;nIkp05?I^QS8ScJrD&w`<wk?cta*=8%hs>?nChgMxJW@%ni0yw=OZ
zM?Z0K)3P=9wsn8mz7uT)+~Vo5du>N&@KRKQ?zy%uOm)D$^#@&)6M!Ps_1cjf@3z&y
zf8<(k)YYtL@OvI}$o>aM&`egm{RbSwL(Vrb-mmwqm&)XoxGIEYYqKBHS~Ea?uXbl!
z+sCh%RDH*6weY8%QBlTVY2Zy_Y-=z%1v}$$-qG(#$d63Jii`tV8K9q?1p|Za9(8Va
z0G;-QPX{kJJ1H=`y^|N9Mb<()B*}BXZno|has%6VQnF4NIPpQ#FK;xSFuuZrhy%wS
zPhumrEnrz^sQ3Fvrj{7&l$Y2Wi6abx<GG40j8$sCt;l|XoP$i;H^48E0S6eaKYjk?
zH%e>2K1@r~ttQ>8q2MXS>8;5*>!QSc+A$Al&)|Ui2iep<#UK1d;~{g^dKUt>5k^D4
zJN8yTgI<nZZ0j;mY2MV4vFYtj^K_g+eR|ZYNRwCsrYX(*ono3GO-@StHn(2t#IR{(
zFU;c;@Ef$_0YUnlE_tI@EF87u@Rez2f?qS(^o&5ED0jearh9MA&NJ@O*&Htpa^iK|
zvh!z;Je@73A<s+@v+aB5TcRvVp&fWyx5bI@c00~PH~#qp@$T`1<qXcAJxC{z>3i!*
zz$p*NlqeMD^ha);FNsRB*G}RGZ)L|pCge7tuz6(4>OIO4sebMO74)3g7#)2<PP4<>
z=`0_K*F)+w>YG+LCdqc6HB4O+0m@Rqm|N)h%pRTO0q#u{dzYAO#GTNV<^W9ojGsWk
zvZ`I~OdDuE=*{tJ736>$w<*OGD4|nhX`xd(!*Uj9V5$=Oa1EtfradClqVA`$intH;
zj*MJxRegpD1goJE{#6bEB9RqK{G<qb8~BwpP=U2DJ=c&b4XikL@L18lS1%$IC_h+^
zH5{lb79g73MJ}nsP|@0e0%(6vv+r&Y$aFYJi7c6vyH(<JWKSnws`aXcFeqH#h$r7K
zAEy|h4niu}P3T2s;5R1?#2?!lArZ$Q(;vzOK0M{+>>agr8mlzkz#i4iCzZWLg(QJ}
zDG4ykw8w~3zE984?81HOuVAFNm8uCRQumXL<5QQ(z)%pkC{OGKu`u?rF+#+AAms~_
z0_w3IzpV1ulsa)o6ue^xNTe<UU}wse?ha;Aa1e0&#lvgFsQ6KsA@-1xOVM8m=6j|w
z9tvj=JE9KZ9F5j|rZz0(L^cEgGhsK^Sro7|q}4AwulY}!Z`65@_MhbN-3?Fhcxc{c
z>dw&hq2*QtGMlAP)DhfyG*&Y!*bbK700$st{8=;xx^0mLkWMj7o<tMTfq=bqVsc?l
zX(30>Pj8s1K&fn-$5K+X9g)(Z4uP1ko)M~oay<gu<$jOPz}_emh0p*I17SGJQ~bb9
z%Ss1HNM(;+DB<b;ai9%3c6p;=%5v!M4Hsc`hox3VcsW{l?Lp(xb2{8WV9HMh*atyp
z5tRI3JH`%8Pk7HFCR_z=89yj*ha3em(EFt2(?H(ATv^isW(rGo*!)FlZ)6fj+W1q&
zm8{cwh~a;PicMgy)Y2g(6k;4jC+U5%P(iM>mmO_d?|`r=S>r8hvrn#Croe<I0bz8e
ztoZz-DoJoKf3uIi!mzRXfCQhh47JepajqB(e6?TC#Nmx2A^3!g_D*{z^u+cNaii*z
zp(zyQETVH>xU&X!A+RWhN(ggLCUJ68$>@`WA>f_YP6)MER7U}Tv&r0WoB<NAIY2EC
zOw#~q`}r5Awhg)E^Hf0wA-uUjmxr(SBq7vO8>OH|fcpDZUbTYKAuO@H9R7?j>S+qr
zf>n8I2PY5S^vy#XgE##N)kO1>AfRtzZ+$ei*4G=P90}0{{e-2)H<PDqQu|HcF=ufH
zL9Shnf>D2KL#o5eR0xQL<&vJRcM<#_iR^}DpXR)7H47#I*%gIuHHMY?@UI^D6q|*N
z7s~8G6l_vRiF>oDb>GBjwtsh{dE(pxZP{V4PPIYvGP`g{Lcj$%%asa0-p&$OmEk7E
zs?(o$gVOXx_n?5%DFM#+;v*0X#X-wESHNE2Nyk_#<>|o5D`0AQ4R0F`zz<AS2HF~i
zu$_RTG$`(l>D53M(j&||%qGs3$_OBc=iv&uzMfE(C@vE<(03xP5QS~=ImNf}k7AN%
zM_e~e3tLo->bS7u6xb?5?L{GuPgd&jkld<*RkX^OtWXBv4xQ-eG69r9*;S>wFj9N^
zq^G)2e=c=B{j1BQJ@jYE?%hv^5M3#2uQnhd&{|%DEChE_p5J9!rTWi|sHw#lcv!Xr
zjFNK;R$z-+IYg}b)f(U47VG{xL<BLu;-~`=W7H^kM3I8xKr)ZrJ#vgJ8`9b8sJAMB
z5mccj4n*pmdHw3vtHJgHM{)%uRd9^y=a@DbsC7F*Q6FP`FDqt%P-TOK6LteuqN8~o
z-E!3tcR=`eFZ3Y<V6=-mDucnAFwh$mD!ehiYB4$%5R_&azoxR58LD3<1=$4wiHU&z
zrRT4Ym;!KXXU-a^BPny#Chmv!#zJ<Fb$65TBH;=zV5umjRHr%9?z`#3EIekAQ2aFz
zlJ$vQH+>YcjlEp3Y}Vg5Yk50E{!GM#x;Y=8Vx-MV>M`oPjgGSXJNNN}v0qh^N==6@
z!@{1SUxk?v*XusZvuGe+aMdlUHz^EVqvw&;s9%$-s?R2GQX6%}Snw2bjXMO0q%!Pc
z{+GbeS_Ddox5AL3i8~!0O<#myDSuQqJ;Ih08OLk>q+-L&FA>G<MR}n$QT)3MY&B)X
zY65nMzr8T$@t;wu95-J}VAqC?YF^QoqC6xiEcuINX#HgCO#Wl04sT#w=43{vzqTD@
zrqfS3nyqi-H8tl;{A&QXqrZj|gSE3Hz>Ae&2P6Mh=KxppOr`a1_K(Qo*Opx#<&acu
zj9T?!G|Ysa-}6e9wXkTeI6{4ZRR<j~HkKG<wA&$Qf-Hqwq1R1~WLMJkG=7)v_XcdX
zSeSKxvBG0wIbEF?j&E3hkH1gQpN-hij5;}m*@F5z9=aA8W*UyHphdtfe{YQR+Klk0
zMj#sJQC;4H+;lD+n60abU~^P(LWqkqfbEav#lzAJ7yOs5R=rUt@Wx#P1IcFA1OX4V
z7J8Akx^LZ1mH+tBJkYG`4qnhcKS5y3qJ0zrIXS~gKMjl!y`{8cnGiqZ;mm-r#1Xx2
z%>b97KOojLnay!113Ev!bO5a?VO-3-y`}av%*kFnFiv26p)V@#e4eOa`_U9KG+9vX
ztNbQGlmKG;(D-6nmQWSw^U0iebM2b}OuxLS(R1uPqFxsPJYa8{uIBV$5r7wNd3Mma
z$ubcNKLVGD0D49rXI2ne9dlNMB%mvVWWcvanYJ-c9vm6q$w3?-5Gk~bXJ^xYPP#!z
z#(bxjX`2dQJA+w@!CWYgjpw{cY!n|!D?8TxKEvgV@DbsK0Z<ugtyjDF`f30cMBzzF
zqdY}_%-JLM7n4H!SRk#@8M8gbZp@k#$G6$O;u`wq#yEhV)G*mYU|_G{A+$-_fiEc`
z3jDF*FIkuQSD@k#;Uq;JA6z12_~WvSCIK?PMVu@x;Uisr4&ujye0jdO84cJw0OTP0
z)uhB1RCQV3kF>TWIjRz(z`j7MK)xeJy<a})(P?gkBq*_)u&(T}o+(F=?V<+CAL2G5
z(UNu8osh7(d5*$Py6O#&r!;<J{$R=6qsPRRr8L==rk{<ce#bBq)MvjQ(>j8GS_&ti
z$Qt5PLpI)~lPT0h8Es#1_wwH`7JLb&IPW_N&v*};)zwK>K_Y<Ix%C!g3X5+H-BCIj
zC)*Q^5L&kP$l;}nmo98;&qS{M;nwL={=nI@bz0(JkWK_xZ?q50$gBg2^<3YJvU>E{
znOIC5>6R$wPf=%^;BmTr5h|Q;tXi352R^4+QgnJH;wSQb{?o@4{@u{y?zjUJ+<0I}
z13;#r0oCP57P!+-bDx=`tI2QY)=V7%v>7UEMFtxkBEY=Ov7c-!VfGBWf>VD10^5ms
z2?^ppEvWw>rW&3cwlJ<#Wp0J$g&>Oj`K3o8Il6dJWBYkuIT=$OW=5~kk&s`#Oz$EV
zC?=YMs6e#aTEw81k)L1w<Ri$3Ra)xWzyX;JM~w&d(g=U3Ar@4mCRKn8(+CykZlhq8
zyBy+^gvaXDwu(^U6V_ld75G+ClF{c)R-zH;EK2H!#!hoL&b<L*#!sOf)$v#OCm*p}
z(?8HF+&3{HJE16#N0H7(LdMY>FM&C$O>DPJV6txrcQvIh0ym^KQN1`F=t%Wk+<kL!
z=|3(?>9A?8L*9{w1l?6QRfRX+z=<MUkf4&Dz$4z3#w4Yxquwc>Q%PLY#fS-e%^T>f
zufZ}|2#`RUrVR+G)!NBIaf~^w^Mf-BCNmzglcBYn%}jp6H}nARL8)mjzDs5*9q+4I
zlmu1FpK*2qFAt#^sa=oH<=Mrk#GHNYa#;oGgN4zea9G}7R~o@651(KlXhbd`Jex<z
z)Kf4^r&a_3h19PrdGO*__NL7aE^0G;qJsH!5!{Y>Xh{FH=O$OpJ5t!c4oFq#(7VV)
zh}oDLud_rc@w;b~CaP#M&!CO=c*4X>Zcm6JQIuTCyDFC98xH6**`nns_bZ}RM&yRT
z$;#<hUiycj5F%cW)aWZjuTedG7WfBY$oWu~ky*!~Q&_O(M#)O)ABDX>JD$1#yqzbi
zUBIPQWPg`#5A87{=&Wi*w#8rv3_<`;ms1nDyp(9#g2_-5qctTrsBX8^+luCUXgczJ
z63$d3Eu<xYtUiM+#$MrJH*QzGWK+z>0Q@+uT-`vBPOWyea-GM_9a=|O%T0^5J%(sv
z8!FjC8M5vKY4o~0tYfO#S?cAYsrzfY^$z!eb^xOo$OR7AtiF}kN)spus-$D2!9u*)
z%bVSvk9_>%jw~?^%FN@;$7*JH!o26jnosU}%6g}JNeOwQi*N<q6P!PUzl0}dU9{mL
z6WD}AM8Py*>~rACvtnq1vAzXWH;I<M;6?7%J4E>Ef;bYZ+FIm%i4K%3Z&)R?)_4_9
z%i0X#SmZk%)EItjKwoop#Vi1=>^cnx4y;UOfuMvrm_o-MzrxK)AB*mE@^HJadgRC#
z9jrd8t8&MeUSOG1OhDlWjd}(5^(xyGq&Tu(m1GDhF}5lQ#1C1&#%Q1_`EK=DlIvCS
z#Q89;;|3Ih^IRYlpAFxxhB)GoWO|K)avNrTYYR?mCplC|cJ)6ZMi+yYvJRB{@CX8G
zTA}gok(hsGJBS6!_PsG1{!7W<;R=Yex*nSq5Kp)yrqCA#<!VNMX`Lcyg_c%;IH`P-
z<Bc#E+|=-l%1y70=1jd;6LYsdmN*JVr8H9Jp~`Q(umBld>IWPlrvGfy#0ypK1sf33
zIS1YM8wfU|GpuGfVpj6QXl#<bL#zA1t3G$)FDI)^x0H-Rqj@O=;i-eS_-l`uj|RQA
z<)iGU5E}Bp`ejdRhEo!&H_DI=d?x9?Hdq^u;00)AQwZW<%t7IM;s*wvE8%c8zbcbQ
za~-96wW6rkO$TiowIr#pQfFU5<)>g4M<piq4yirc4({j7sTXrE*F6G)oV_VUkdy{Q
zNSgm(%iO?!5wE@=4_`niyqxo8V;lFIbPiZZUo_|<o8fXj`~y0TD3c|bd0E1oG@rgu
z&XYBA_)3>?j6I?MVJT(vbbi1A3fG|gUO&IGp{y+S`D0|-DoYK}9lIb5JYrh^dgPa6
zSQ_q`ArVfJ=P4{|u%vxWcVg<`U6%J?xMTIF4tROW6g5o;z@l&*%hN<up@aG$@Z}$z
z@?jo&(u9kYi<fNS70QD!F5180O18+offCc}6t<1B$5t?n=39aeR(Znawz)s-{WB*$
zrzKWUPMTYt3^{Z6>D?mjELX)Z36e!Q@1)6c0rK2#`l1<MZYyPr&bT4PjK>R{mqLc_
z-UoB{x$AD&xV>?C<1&Ao-GJ!w{i3zba4yn;M9*uxR!;x1*{A1MQ6DXDRp2~C2Jw#z
zx<qqG)>B20+}DdTr-^{YL_J}3B5W}R8q$kXa1yk)FBWkH@f}m9(2e6csf;r|hsKh#
z-=HfYC(}9nL?vR;o@@IgvR@4I>=k_Uz-Mn6jK4b(8yI&?0)TI}OMe~&)K^%Z1fJ`B
z*iX2TFzrrNvRMdR3}fECB8@+lX@!f{_6HIe{^(JG<`8{I`4PD(-#Mf+Fh~O4n_no=
z@IWTNMn~^AbTf|)n6Yb*ev#QJkwZ7oA)nyF><n)`Rds|zXHMH#loQC7V2yAS>VyC)
zrFMo2#2as6($N`Hea_XU0Owi^WL*~SvQM!Ne&$DB3Bwm?8O|}TcNBW1M(F;_a_w8b
z09RqGED6U6;kM#I=Th|l+x)#qM)2B=Kr|HzIaXKmDY6*q7)R-gTA0^cm-s!Vl^_n{
zh5(`TsG~zS`wjmxWIF<WaQT>1<W()TF>a%>GVIX|5}Kx&^=e0HniHg=Vh$m8r0^Cx
z7;zZhxzF4&gSF#s+1}Q@G5vx5cTd+gT;NREeQ))HRC&ip%=k%>!cztARdoBeBzSoS
z|1?Zx{hPVonp`9WZ)U(h@hX*Vd)arA3_{Vk4J-MN7O3iP-@sRBL+uP|WUnIey0GHv
zuOl0nT+t(-2Jgr)D3&j~1#f8`XA`Bpy}@Z4;TXv&BVzZxc{J1RA^9POrNqpqF6N8k
z!tOg1D(v&k54}3JtV+}?I*}t-RynGmD~@{0kLCGfAX7cL+sYK)!WHiEE3S0sgg0>W
zW!i3dxzVo6uovLJ-i_K`C?^09f=RS^Yxs<}X_IJdW$(&}9>Qtt-67z<&_HAF5QJjD
z{g9WY!;$2Z_sT#nX*5I?DsdwBx8&}5fnOsii6HmOUv9a*@a>x)QG<8vl5ZNwSGUpo
zliUZp6G1%sJvq(jgtnycNa%@!xw~?a?y#@fNlrOd=%2zF2zpX7xVfUpfI_t)b~mF%
z&4;OVHwP2oJL{AfbW9($kMLO!=^8229&pZjV6u=#>CL(uPrP%_$Ue^3!WIqsOT?u<
zE85%cVQBcC0@*z|9MAAVu+mh-_}JtgD+1lSP^^a`)rER2$MuD9r$IXWr6rwPiC7pL
zqWGD)3<`;h8q+dYTr*9f8|F~u^9$+^ELPqyNV2rOn7&9_Fb{N=2PD1{c3A$^8s(sv
zp*HwWfP*bwu!zR#`=Rp4zPrjneg@}pz|SUHxtxOmR>Y29!TLjQE3gJbu~p_)e^5_v
zETY;cl$h|~rFycfgW@zCvcJ*AFx7Y9E;2A30A|>szfk>3Kf5SkUFPlFVcM7)E`psI
zC&inx#1~^7cyD4Y=7`NtyjZ3tpu3JLThD4eNHygC&GS#hcQV2=F4sY=KE1lnb1tTd
z*<CXz`FQmNgvG8`JHcrW{#oe=<<e-PU5t*Gx(3FH^~tL(uMg)u=NJ~xz=qv{<^_ag
zLA>Oza{F5Pdc_?OH1fX8E<7alrhQPsHkX<Ftfqe7W}4gtgjk87graGdfU5O<Tk#^~
zzQO6KdHh=912o?|c>8aZ{6FCL|G?$!O#c@oXQlrykeu~DX!`#zNS>@JWw$8`-2?T7
zz!~Fz?Hqd8RYMb4z=JH)c$zN)7o+#*ym_U5VUgd@TVhIF>Z%42gx8(_?k@gQ{MzWI
z>L&ZM`6spWx^qX-?_;t3yT$AB^R?dYht}=uMN9Rz{AD`@-n+N^tfKgaNRv|eJ=pk<
zV(*rVUk0~z*6^;<n4gW`wSR(${?iqT-=_etAZ$DGgRMR^L!6isfuZkP+P`mnUtFCd
zdBSun52UuVsW5{e3Qg34uD}rae|}F9mwPLKUx$eOC=9!0xb%a>a`Lm6q8|MIv!718
zh^1}n05SW2GsUqFBnC*Z2s2(L?sbQZkw70JB~`e7w6;k~5@x$hT<jvWS>dX!`O~|_
z|G+1%q$p0QZCtyEEr)y~y<CjaIv$O}^3$T$?YIE)L=I55Uu;*oEoEO+m<bueF(gK-
zXTHzC&pL`}Rl++?7+)V|8;-&L8#OrK(7~jN;Ac(Jp7RQi&wOJy4Wvb@O^V%&AaGbW
z0*3@i9i0egFV^RkFwh1QHl5)>n^9pitgx}zEz<I5O|5shEPO@$0@}8YfReHx2f1hy
z!c>ZcMYig=%xu3m9eGSV?NCqi8~<c5&1pOs(p9n13w$$OUUpebyR0LChTiz(HD0C>
zr_rJ%8j>6XIz=CmI<lBnE&*x7C;lTXZfEru)?&h>i92`Fca^^-jh@+ZInHgO8Mb`D
z{AmlnN`#!dnw<s^=4OA9ZKeg?hO`m}DI;8h7n85dw)J!Tj$fCiAx(<Bv%p?-&FcOX
zc86w1T|6bD9UK2XjdAEfT5H(_Rd2<8`)%f~*k9z2LtpvAdPGAZVxhA^1J}POS8w5O
zjT$CzcDN7sM|9V~T9D$iyvgqyNUYrAiX68ysp=-O8L2pMY9}>aN{D}z&^_9&t|fJ_
z)fmNLUp~bJldQ*YxGXyi!4910@sY3TXK4k>fvkGTd4Oc8G}+7ahao+QO*o(l(R|Ur
zct&C?J6@pXV<x5SFW=#_LGheg0jxKzY-uYdEnDX9V-jkYhb2F^1s<7vfSkNW_=;=V
zz6_F*I87TDA=6D~95$Q};|3t&j%LhP!S#r54q*29k;DrDX~n`naihL7UlR6Kd(jjh
z?q0qe%@&jX;;%bo6~CA2@7>rT@W2@*g;oXQFzNaZVx*OExqn^Xj7u9SRi=|P9z@%3
zNGZb!+?eL|$&HdvXY{$a+sv}TYN18jJE|xeX;ysDs>^wyj_NiUM>j)mEz^U{eBR7(
z6*dCEs>5UWDb$Ppk3-ni7ohbVsp0m5sdS&48g(M_9JNZ_@I{yS^?JU;hjqH^zioh<
zlk;|7P%!niw;DbMNx3%XhG}%Q(#{>%4Zh>wIzI4Z)BpDUmz((?_wgS$!$8l({J*g{
z*8h&hvHm~B;+C|v>~LBUz~0FZ>-l!Op33T$4{u?aK|TM>qlRn{TjFIr_P1DbL=aP@
zWUYQZW$&4|jghE{wJ7F|2`w+)vSLqrJMFz0kdd|{J?ee`JWLjj>UC_rO(~=au6{Wa
zyXN9)6qn`=PJ0rOHFt6|N*YE|z@Ulg5p2E9!HF@(G*A9``L=(2QN)mK!44VItZto6
z7Ba_gYOtKKS3D=ud%AggJU)HHjglEEPI&$KP<y0lng_7_d3+U?9nk3Rot~2AZ0(JH
zM1)T0%Cn;Y2wv;u;8mF3MZMG&LyVia_zEt>zbhK}j$N$k2RD$CvnZw|n#l8)dAG4f
znb2(S)!$)bjve;v|7OWNp_GXg^7Dv_gdub%c(GfU=qHB~W_$(Dz4&YQ%pHB6MM32>
zvf?unE-W{8{$;iZ+s32)tx+(?3!*XtekItvv02lt`jmGvIsNVPo=Vi$nCf0q>c_+R
z^|bOq79T&KaSqT^YIh-k(e4)-nIKN6CawAWOvs=Z?y=`CYx+YAhs4YmJ<0yp71GUq
zN^K%G6z?3y$IC|0zA0!%Hnu0Ncpc=B!oHrChImCNeFQaPNYpVsg*Q+B0D^pTIfQsY
zZe9`qtr$_S36WR0$3Hj8t>+e$k4Wzjq-~w&=9TK*b=e)6WfRt%!y|E;&L3f0ZP8(n
zam+ueO=fY7BO!?UvDLbb?Lwf*;p4=bn-@-x>gL^d4U^&RE;Ww|7ZATUUZ&J+EXnS<
z>O|#TbZyd&q1A7uC%2hu>{0D%|KY_*v~geYyT@RsHR?O_;0Vrw>dE1N0xIdD0d0EH
z+a1*;_2GsvJ}*2Gl^pqvJ<r;UgL}3JOy-#79@!d$Q)!e)rQ`gS_JX+<cq^s-#H5RZ
zQ(bDM_}}z$(x{WHO69SUeT1PCqL(cGy{TK^ikMZGVmuX{!h-}Ttcwj1!)iDtrSiao
z>)IyakD88yj-~_wkcA#w8*}#tDr~#W0;8T<3W@Z5<`0yCFL$2bsCiDqv(&QhR1e+q
z$KRa>Cx=tKxBWI8$&h)}a67U*Z{y7fL841<)MWDloVGme{VvhOT^tykDHcd8W=M0E
zDtULC+brsVK<h?Emrdq0{S;R$WCj<~8Pr8C6@^|+a$VzH|E~fAp~k=z^2yLAOgbq<
zO63~@EmjPBh1vz&t_I2(^I)Y(cUwPC>_uW6WQoQU>NlAT5h|o=284r-%C8bDn;@8B
zhy)Tj@DSITyt8gy(=KtX7gY~l1-aP#x|`em!fGeR>>-NipqJkQ6g_8#r01n8q9Gz;
zjJ23^3AQVWPF<@dV3Cx2f4X`9yca&2#0Lzzb34N4ert2Mf;v*kX2eyL)OYKiBX5V#
zj3{agqrH9qd{qlNC8og01~DEwx|pem)MRGqG&5uBP6?t+4ZwYfKRS^7@RKd(l%QMk
zXxmv#KED0p69{FRQ%p6yTK~Ybwp$}X+c`)9mFDHZwC5rVNnonho8p8U1NWIh*@QyL
z?97&Ra0}Pb!cYFHx=l2dilzt<zFW&dW=7XquM&{;XV%6jka-xGz7~hLm^8wBGh-MN
zHo1<IV<xlc475;!DCs4hH0ASaLf2-iy3P}FC(-9y3drMwvp#xEHERy5vE$36x0VkV
z06<W07kMN#U2!j3H^1|TF07{SaNcL85y30nx;UkDnVb^10%vzv%pZ+0SPH{D@K*n0
zFI()Zgfv#5Q*3Uf`X<{uy27T`DeA93QAv;0DIx6k6!Ki~bZBs|p0zB9r$Qqjpt|xN
zS|dBEW%=yTU3|eJEpGD^))QAH>W;tZ>lJIuO+tR>vh7#D@*O#a`PfS?4?P%Ar2;R!
zJMUN*MQ}SZ1az(j$BP#T8Q0W1dwGwWa}eIT!aMJn%kj1sI(o@v*g(P>gbzyGZ}q${
z>)D1Tw#&O(V?{eVyKH=gQaJZNlt3-V>Lg62llNSjf8|fb1Cl;fm{owemMN;k>UhYR
zhZB$$js4sKsc?p=C=%*q(xTa<iEG5y5uIAc04`o@uM*})9-IqghRjCbMS`1<@bx;t
zHOQ<KCt%x^IjrdJmGuxd0&jw&&F6cpKtSr%oD~KC+k9N^1)(Vik$vL>L0D$rM{xV$
ztrR>R?R}`>s76n#SK#arEI6o28>3)B8bgmxNEWsgERlEpX~~<&hg4lBP#DzxqXu%g
z4Mq0H1)gQB?&nYzCG@ZA0b=kHj$NL^u-$y0Kw8Xnh6=m-y^^;PPFW!v#+&-#DC2M+
zNLHh1&we9!h|ZRhC2Az-W&_>SZbauhy$!~RgDPu=y8z~+8}fL(&tXP}fhDSxAry|$
zN1K+^Gk&>U?7MG_0{wiW$7WSM(w@4J9!=m%EdH`6&#s1hV<0b-?I=}Gzy|QwWUW0H
zE7A53#v)!ccF~T!pYYz+AVxAmkz$!C&9)A#qJ{0z(t8g7&V6FjCXL*Q1!MjZ*n4q#
z2-~K^3DPorQ8@N?GNI!ow(LIT?1%*0WY#>5sg1Mg3ff*Hqh662h<}!X29?dMfaRKI
zK$_D4*DOo|zHqZG>@%dGqOD^?r35qF`SWrSG6!;46$K{sVg^NT0Y3RsvotP@x_pp2
zGY!j_%&irfnY*bzGXCBbHTU`?E;3ObwI`{$A1ov1NZUA3@avY0W{ZSJrGUS8k08-c
za^-lH^=wP4E73i;>KuwCifwILQ{(OsE+SRcLY#J#KX?(Vt)klzjbjP~K+@Xn!Rw!@
zyC2H!<#w6OxoULl`@|K;Q|%WQa=c(Ar8-%~PY0=Qdr`ME@KyEGc_)+zp**8eE?Y*U
zi>*vl;n{H6R#GaN9m{@69Hmix{Up(|3TmM_7<cz%5ikGza7Xrs7CSLrn+g^g42GlY
z>Se>4iq;S$NRmM1DVyKs>Sapyx@zUgzK$o{2wrAhb5(N==xNY`z2JN?G(QY^ys3o9
z=R1haE3#fIaC@VKV63b*NU{5aRY)BkFI+-LcUlu)b}gyp)VL)yLVoLr7fU(D5OBu=
zi9)ilD!8%O*q0Hvg*UtK?=OE#FxK#nk-4vSuv4_UWyAtAx9;Jzq?r=N{ACMJ;A-&`
z7OM!e%QX~9X?1gF-`Rmn3v^iDd));^*|u%W-j>NZ@Y=hvzT3vZSj4ohkxJ(h4JAIq
zQWyt7&1v)M$tVmZ$~yk4Q|2}0dgUdtn@B6!LaY%b;nf#gp1ki9x-3nz95)v|eypGh
zWO}w)K|vTt1#i!78_aG*Hce0m@Pouk@$_?4B^pq@VOSBtr!$0iErZto3vi0V!DW+e
z^`OVNRs_=qaenMCD@3XQ<zA#tGGx4EK!MMjJ1nohJSS|5Uxs^VfjKR7StmAoENA0@
zPcOoL8Rrn)8=riEiralJi<o7mm|SffUj-P3HjB{vXahF7B{k!nB1Wy8`s1q)WlXw%
z=E-MHtGJJ(<3ryA{XAn7n=L=-!UOP_>3*gnM;C{lTl(t*SAVhv|7^3WM?_j2TinLL
z^_L?_Mc28w^eue*H&Kz1naowJ_+Q&WA(!1Nn=OGJe*YCiTlcReEOaaL41%0nk1_OI
zdA3sK{AQr0#EH&ZP7?k@6Ix_i8z!R204PE1SlrTF0==3NR^yVC27J)P9333ZPKN+N
zTxOXfdO`d6`lh1RZ7deQZ{-c{ofut_e<$A%+S4q3IM;f7BrJUBJ@1bK42gsg#h^Xn
zjLHUi`OUHxJY>qzal|*DT`%kWunO|94uJ;B>5wG4$-$In)~jKTpYNK7hu{mNZ2sX;
zT#U)1{x}6!*)ey@pHNbsy~`e4UOz}LAvu(UZqo<BLS+O_6ib9)k|OtxAx8>O*bv&t
z99=&<u-!ze_f%D_e~vDoO5Bne?=K+9YST~?R-X95q$%6H47~Ntz=EAk@F1xK*sZ&<
z8{aKn(3<scUDw>qU{Y894|{S&lq5_jX`7-+{Uh&UGwx!^F8(ZwqA|-@3F6(=7-}`p
zZr!N|TBWx>yEnSlJ>Xo6(I0}M=f8&LHz}adn<ACcNPYT?9nBf@p{)K0(9Xa-MPusW
zM@oNt6a_LT6_!0PgX!Y<Kj;Y;CzV_{0)W?k$xu1s|H;AH$^SdOz75qeyv4)bZezpV
z1N5k#HAn>J`XWT>At-I!(;2-Z=!`5oY?W*+UoN8r!roe9G(q=}Ej%0;gLJ4~S~Saj
zRevJD3bPGCA}-Ei>rW~2_OD?+-w(c<WR>(H&V(U6k7wUB)$vck4bfc5(bbqO1d*~&
z0Z!>EE0yK-f4q+5&YZ1=l9<A5u`HS(zDU@C^Rkggokvz2I<Cia%Z!QpTjw##P0*ic
zUD?$n#Ad79nA)F0;?H<uRFwI2lqis4LIisd{-82tc8tNHx}h>(G$=aWKzgOI>`|tx
zoN?)I1I2M$OCACI;FH-ApSoPzWM!{XlGl_ImpFp4-ifZJ5F(cnz(oMBuEzl)p(JnV
zj^t;YSF~oOmDU>1Za(>kLI5e$wKx7`Olbj%f0n0gfl9&V^brCd7c}ZG@p4)lIL^L@
zK<0+FaaSzav=cKh4h@^vKcXtfcvA&S&^)WbY_UpEXYIA3tSE{u)7;{*0y`P;#jLa{
zg%7@NQnM+A<QKWoRVd@ZzRQ0WJi_tL^SmC%F!m*$ik_x-%+*XQ4bIFr;{S3^U^)y~
z*}H)QUJdPx(ac@&1-tu)+T_*3;i=%G7ICffja#h_SbwRNIJ*@n1FP7ilKeHf6#n89
z&wHnosD&LPj@=pT<$+hp@v8yzC=Q={`hFvGO+{F%d-ShT+t9GANOie7f3@o~sJ!zQ
zw{23s$*(!{HxiqSmel6nG201P+>-ixy&6h9`7{6oDbzFZX)Qa+hi0W3iZpP-zJxOV
z96mXY6_>3^FIVbjrQDQEyXGe8HiL0VlM_Lg86_QgKrW>{?g4jRCTmD{9~vBSnjA0B
zj)pO0q8Z`dQ49Fi`z{LvB_`mmmc>6`P~CJ@;6H_Sx!NsWn#8#lM04nK-nqO}dG>Hi
zFLL{w*LY-*(DHiF>54!Oeg<_vt@ZhV&0>dC!w1A4vcXQGxerSj33il<Jg=U<cAbqH
z1&OBWwmK`XQ#Q>-?6~BdO*!#uu6M%HdKQ#DdvRDgv(9eiI=`y`WGShG);FcZ{7Z6I
zE{XYIo{k58^nq4_H0b<eBQm~1`GR@jCYD3(M?6#wi<2(4uDTqbs`71CRO-$JXK4q1
zgy^H&v_m`o57Ehg%Qvy5kZs$U4ds;oCVvt8?^_l*jX&GdncIM;)*NNdlE%Fxb3RIu
z^v1vecqPxmqobno*|=Kuvjq|wRbVkl+QcT?flq*-hNXIO&s2*m!BnHtx|Dq7{zqGd
z47LPLwa!YDff$J@i%q~j5dmnkNE41UR#XL*l1anLT?E%tqVJK<sL5{ePfce2-pB{*
z4dg2&tmj6C?nHSBSpL$Ze&30iQ$$mZ_#UMlyiII?m#6SHs+fLbPMK+OBH{Zg;nOmG
zw6h%mW!BS=)Rq(irJZPRXuu2aH}|vwEE5B&<nS0y+GAen(OpsC*%Z;2=^vnYklAW8
zsgHQUSj#@zbB~UoQ4n2B6JvC+nYy=AVDC*ous9&i$C4(M)?lPL&kO*b*sfF7BKaOL
zvK8#q7OIF#Gw{U%<Rhc4WAT;7oU?HV*j2Xmu)YFdriCW*!mmE8t{Xd0ZIYU|`YCf!
zGD{dv9Ek21`Xz_j*tU>i8CQwn+;xA}6L<bY^U0{un->db7-f&leG%nmORU<z$~38?
zjkM<D1O?3eG@03!1<&PQ@+&>kw0|*swyaqw06$dX7%fFz^6m-Mf)F?1O1j!c!k^rI
zA1K5YDGb_nrVRP`0D}YEMi8dN4P3f7EHppVeZ2zm@8glyBgp91{L@3~3&zJ)AHArs
zll<>5JwKC~A8TyJ<8~{Y!wZCa`}3PFdtr*jvpdD^W0$NR7L7jZaSo~>I6g|fmyr73
z6a~Ma2uGw$z&O-DySN5D_-j&0Fp!#ExF#(Aq*GSH(_J&GU<S$IEHZQtHE{-Sla58<
zM{Q>-`|x@*wKpLDVS|#o5CKp@SX<NjJi=)#M5FT|)5@Hglrrf&0Y#!$ckaWLsYQPa
zavBIa5KJbz@i6R%dQc#HW>UUYThr@{q~6<g)MaE1|As50dACi@w!|ZhwijI-J>#$y
z^Qm#Z)xeu`Mx3OOkuq9njGQx4v=vv}-SJ#9M8OQ+>{juHj~F>^{s&7nDLJKo!L9Q!
zDV>l*fB~C>CB{%QE_y051ojV8l+-ZCx2CZl1bxn_+|Q9DCJR!t)i0i;JziOE_G=2%
zR9CT@_S~eE`_x)YZpp{(XL}N-SM@PWBt=7=G?WykLU~SgI4VvlXDJQ8vQJtp$_0z_
zS{tF6+md4K^mQXc5Bm!(YzQ9vzj3txh3lEwnVJ3<M`L6BFSwqK>Hh_;@5YvLLLFr=
zBjMYxRIccf%s1JN6@o?QYC*ypRnN-81_;1U0ZHjgQ}*@lFf(=Y(*39=PTKEVHZLhH
zE#29A_-ZgB+F|%i{r2<qwg0K#%GSlH+dAu~1Lyv9Nd5Nm?fLjbC$;Z9@P5>M*t(kx
zYD>6@!cJE6H7mwUuIoH_2+9TXbsrKfZ(p+24-;D_XMyD99x~W#dhm63f#}4lZo|&m
zuTxc|=j+l5-2ugy^&|*Id#!F`nukdZ6PwSND-+mD#SM8L^gAeJc6|+$mE~^=J9Exm
zafk`vgElk%Rsop?xMMqm7i`}n3{;zji6s&qO~crxTh(Re(?gmyer3KI@CZdtcSvAu
zGJR$Iw<|3LI!JPA59B%VrNv9QavRA1hSA$yzdf;sA$?M)SXBfJlr-<CjX@5}L>}6U
zfPZGK>duiBCew^8SSkf8k9Y;@FV_ZPdW-q(^4S_h7YpahE^m*~3D=jdhl9Rb-)A-O
zrZBek7LbA+`pE6*dTW&HrxEGLLH$2j^q3fHY&q)e7mK%uF=sScd<vVGS83tMFM>?F
z;r?uBBLL}i<F86Uq*JNV>CmuSkZDna-Q>~Q5b!)Np>Qv-vB)chEjiMwL4W622aUP8
z86ky_-|=rR!FbQClMw9mdnVwEjosO*AJ-;4+`6_tVnZfuhZx!b;aVNNfl}>7>^q1T
zy<|sj5^oZxKUWSv)b76my<-nQOkTlS=m`zeAEHtEYW~Wpk8ptzz?*Vr1K^YQY)3;U
z>2mUE*J+ZI?K2=YL*g5dz%i0<l2<PZtm@}$wbAzxap8s-5OBAKKO=-=Ld0z11o)GK
z3L?!Gcj~4Lnd>if>T%}+j60DYICAI4@poqx>(ZF_gfn5w?;4wmK)xmUUl@q@E~HFf
zNLm95P}~X{G$5<KOOtv6$e|ij^gTepqqk4Zt>9n%Mi8qFrKgoOk_Mf-6;BapV3zP~
z9Or#}BQq8`jv6nxN0X&JjQGL9@kKAnf~o-@;s62j<rQPK$M*y_M}R8K1em0*MX!d4
zLG55eFseaAZujKX>7Nu^n+Of@(wsDL(~;VKlPsH(^vy6QBpB+hLL3e8$?XYxae*4O
zY(zwNnSc&>j+*PFAtj?r?*OD>@a}F<>l$@z^9AbxcqbKLGgl?Q_UaUq>@ZRN$C#X0
z&8odQ?ZXbBQLuLHs1tRJgpVrv``V5yxH}qFI5}&$iKL}Co+t&A2F$1^Vppgo00^aI
z(F~zcre2^vdcv{CXL%Zt#B`Gsu|f4a(06mSJEzI0pH4y8RtjMt2{#Bq`(xo%T{+@?
z#p)gUR&448XfBx3a1+ZH*cNELqF+r@4@E%6rKjrr{o!3@at{PEA4W2#&xmp{)$S;4
zjoRqO7Ve`>F;7s-o%Vv|WXEqSAwZtQnc*flb7->cf!tg~UO4|iKu)AJ(`4=;SjurG
zWkwiCWrD3b)G#KYN}G{DUO0_ZMdK`Gt}wcyA`{l4!-b}odn2`}9J5+PyG+|xCHOmE
z9Wb)Bo505e2B?YVX}N+xxr*u6AYbB2<QEWY(b;HMw>9T2gZ2gc8-b*_qldL8{$U6w
zDoPJR*{!EgpBvk&{gZ^;hE6BL>RT_(7mbkcz_33*|4K2jH$rF@5-ufBcc|1i%#(;g
z3Zkuficf0IQ4#>Ko8PB%FYYgvl;A+6PdrmP8N_uuhE9!KmLYuhazGx`*I*XEzMch?
z@20!ST!yjh0e+gVk%*|mXG`vHLvDpI*qFBoYt7gUn151+|L?kzF>5d2-0s9g*}AZU
zRd9%%5Fyc4t-YyF$d)(bA8=>Rpm;SA0pXJ1i|NBEWKl36Exc2>qbw>f$qrwttR=K(
zoaq#P#6P>c+(36=piB$3M_dg0ZuB0gPlN~XMGP5iiY{vnom<Q{$|NnJox&v&*}WMB
zs~~LMm2V)BK1tFesMg{)+{_bxf3%lcGzw5I!`)99tQvvAjwN)?DrGGAw?`le5?CjA
z=_vGy!}@c=viQr20CBoUc+gt6fw%~N)7O)uhPVQODeH6&fhrEl7kvkY&^t%Nxfpwe
zEXp2f`1`()tZ2n&zC&cTG0=XrUEW+w_b8zJZ*K?lrn!lC>-Pc(xqXlur=uOS`b$!N
zeR`W!)i`HI79&G1Vc81pucxdK(b?G&WcC^T=@ki+DnyJShj#NWZTXCZfCqbG_O>#@
zv#c8j#8qt+Yp!TQg5$$!6%BA`krfcs)VI%lr!@%Q<QW`BNg+tC-hFtOj5;pcgYNU-
zk^hgebBN9?YPWU##kOtR_+r~m#kTE=ZQB*wcExroww=25pE2&~{GHR;-Mv?1jdzdv
z%sF3leDTm(FAeb@sZqk}l^B*v+kB1#&nVTjnb6D`3Av$jM3_k5MKkGCDE0g5&hR9~
zVOn>4i*8%9%m7*`)j#h=ll#R6-JgP_i*X_|)N)0c+R9(tXyML>$zv=TU}ztXEQTal
zOeHT_ziC|SxC=@>0-?<X-A|Z{`WKZS#u4t1WnwpULYcENIOAU}GmWMD;w{)ix@jJ{
zlVVpFlA9f5qLpDhcbBNbP(e?KwxI^4Q<ib}Vcy}<C5(%*!5&av_Q^IX+vNQg+f6f^
z90p1<zc95pNw@@WUIaj2LSuv|cabJu(mV1oxj{lf=5F~AoH=O$_#lXx;KRp>9EjA+
z^7tUg5@Xyz?2at2g4ov=)6Wdwzh-atJgKNs;Xd>Vr9ZHdJqKmcVwFX^4SXZ3hY=}X
zW+Io|?jeSecz!YyW72``fMnLb+mDm^N{5z1lfwTPa4x3YBiypqlz4N%I`Ig*D{(E>
zXxXJ<>FGFE8-za-<Fel+k8bBXj)RLpp6OE&FpF0Xxhtq<SN6_zHEYQqxmb6sF(`^5
zOrPzDk|j~N!JR+rCjH&E*nO&<W1?@bHo2kL&L+~N;su@MwWQ*Gi??#Q>C0`M#i(0G
z10{+(OBhTt%iQP55-si=aG139C17=Oh4)sTU8xIzf~Xn$v;NL_kygsdjZ9r;r^EZ2
zP^FwU)h%};&iADLhe222IxX_?hwI0{m)=YG{g$M9CYx}nJ;^UCKgz57tAV@xfUNVv
z{c{Q6#Yd>QYd2T9@{qooxKL^!HToqne><KR_WBR(>BGR4W^tB=bZ!5YKXGv{A1o#&
zxlA@*pdx20NwNkpI~#VFk$e4i7Z^;@$cCN*GP_TM6UW)8m~1h>SzT}|thaC7?$5fR
z+$5X$=3msNMcbW(W=}X}Sx<B<d04~?3W;gAu~tWY)m4y5#A}El1~p;<^B)otq#Hl4
zfzYx?C5+bUEvY<%1_%#IFU$c9+p6K>AYh9FHg6>GW4Tvn^Jyhy(%)jk9~W^$&-z!&
z8V7#LxC@W+Y|+1EmKHpI*kuHWjf&Tr4B{Icvf-SoY^N}g3AjBJI#bEIJmX)H-Rfo(
z?2VS3)CLR6>xEK>*Q3uHgdG+#|Gw%vFE+|!-ps)GhGP@6H};vGYI+t7iv>m^4%^YF
zV@4q9@|v`%cw@K|FX`7{-ZfQPs#@_Es_#sX9%0R$_rb8mSgOGx<Il__76gSp(V!jb
z_q0G2iaH?}ps(Z*C)J(L&Kt`F%xrsX%-_<gDk|HpX^({0dx?)6_)fg)1G*=`>8u?y
zsG5A;aldi<WdM%^^q^*m3D&M*?WQ==LF+$`EJ+WI^~#p2R%2CT^x@0{TW<kAV>tKK
zkx}Y(<W}51I6_d(xobvoO|TWfznkwGW=xzPa;ZH$9lFw^-5r%rQ<t>i#cK=8c4an9
z2S*Jfu<B4fD4Oan3c<_-+MvxzE+|%Er9cuL&g3m>S*m3979PcUh^1z2#vC!tr%t*#
zp~bbA>4;yBYtY0&*n|Ra^lf`!CVpi1J9q~kw$isNg(frtdvVZcw2*(HJ3$!@6)b50
z0}EUzC`Sy_&@$b(fj&N}y$+YTkM!c;i?{e(rfV`qdl^H;hqPQjq5Y$!em0ycPLUi!
z%rS9;3vrYyvzX-}If1#TH~sFd$Vh6VMxc(f7J<(PM9P@3t@@viYl<_gv%l}}==kJ~
zjP;Q7Z|FlUjA&NzL>UW{^46Kh2YI7Vm(DhK&EerrbFNq;7pz2)OXSbm16zNZII~B~
zY!-(kK(FyT%ZFJ9nlUIHdrEtCPx{6MhFxCN3%)1_pTc|NkkbwVO9QojVwf#dnpaM5
zVaS71lOm+q0((!gD1hV1oF9*6yGGcR(tdeGDC;k57Jnr?lJL~!(n|60Sa6E0*_qfv
z8=O0{_T~21N7anDamN4tU7pE0PYTOUZRW@HT<~~=>O)wisHH`}O_CIVl7>~s>%~+x
zXGD;*=v_rxw9JOTs^a>s2Lri~wyfTI*`{NS**z(D!@!Z=K~*X_<P3ht!mZx`eu}@0
zsTv3~ufC)<$L?5EwW$9ghB4a3u3J$@=!H_QWu!<-?RSNhZT3}>_H37QTl!uaV)y|H
zRO1K_$(eSv(a5zGa;{Qs{9Sp<)-MhP3bKfJ;ksYGj3B%%$kn%SA|Z?9NXZZXgk7y7
zSs?Lio+WKBQ%PTl7Pf;$`V8EC!y!rHef!E$b&dL}%PWXf2r&=P`noM19cW_zhnGpN
z`{yeTv_qxhlRExrWam%>xw0ds2%otupnyW)cb0fh<;O}O<<h!MvD{~MYiT1pm&o{f
z5-Tpv;*+<({R*$zw#*BTwiM|-KhKjpO0>aF>L6v559@tw5h*1pcfNgE2#JnYU=VX4
zpXKdRyMj0@JuU{cKdb`VX%BK(5}!9lfvGK77=C$zt|dEkDW@!ZuVUoNda_cz#N)l6
zvOIL3d_}{v<=Sx(mIkfJ?oOCm1S*x@rNj+MfeNh`?n3!6rXW&b-ZWRGny#m#%UPED
zpvP3L0R9PV(+H^u|AG#Ofef^CCoW~*o>s8?dTI#{?lWH$4<h@@LYMTzWpNy^fpmp!
zwhct3b4dcN-;EFAl%jW*srd}X*iUq<EgVl<;SHrMVcM}vJCGFByA>O|`ZMk)Kvet5
zBvW4Gui!O8bG5cr5k}om1UlYAz<fOTC-f}Va%J_ng`$l`fl6YGXbCf%3*Fei{pYC@
zJqswXSIgN8!9gm7Lv7d|ohbNiqdrv>t@8V3Iqj9|UB<b(-lJp@5#7HYMbho7H0sEe
zkz?+?Dpq_Y0u6|>Se16%B9u{H^u^`83hmM+Hito!h5#BzHipDO58M5B%?~~I-C$hz
zsj}gCFl(ppv>2KPUV@L#R4Jf?G^Lke)I2tl-ysFEvpV<e*8uaJqI7KL*=d~HG;OXw
z)+9F-zXaWtFIZ<|*5P+6#2E`=DKU@ZEOu!|XZ+qaKjEfJE;b6Ik$T=y(Kh6Ab~Bjs
zG2OGSk4@vIC-o;7=3b2+vj4zMUJvy#^afC(E)J3&2Im<te7&hAYe{Knli;{`diw|C
zc5l9ll3cy$tT_l$JzozRqOEmlz33j+H~E&L2d%qSPUf(ArH1vy4sOM|hE~C7UD_cL
z4H|Fh&_T@_t=LY)=}~%as3oa)d(s&2NG5HlbQ~z(mU^9V;ZeUqC!;o;^3x+6!63ss
z`VxwjtNeID!NR#28K+ev=WjJhGdq=MoX4L^abScWzVz3nua`%_LEDO77#w82zb!Xz
zS>Enp#Y@E@%)?ura77LBReZ5*bObMc)Z!^fv4F98@jM^0a)R0LEgt&amhx@U&Y#!Q
zJbXot_QA+*`^uPUsnyUdn7|qJNgUP^7wxPuOr8LJ1ak>JrU)K4P6U%{Ihei>KMUGW
z5B38EAt(hLqg33?RQ(kNsY`_(A)cwdFWH3VIquf~-LIG{I+t_WA<C-mYo$&baYT;-
zLWlN-Mqnb2Q{OWkA>khX63<JO>yw}_Gu@ywXULmj;Sc}*neaK#m*?fck}~SW&!r&g
zyR&l3Fh#PMZ|&yfDvdz*tWi-V>b%{?)$9`^<307h3$G*V3S#@g%${$<s(iSO*{={m
z)I)onQFsZ<yLcKg{xnq5X$#Ct_HPxU!%ka^r4R{sA6iUUhy42Vab@)yxKdhGs*FZw
zs!98h94o6}8hRVgkD&C}E+?$7@(C5qtJ^y_OY_THf$#50Syp$90h=RCyt@^DI`V3Q
z!0A|br&X1<{E5vWDy`1fuNKcm+yE+v-}1V`92cBhACW7jG~;>7HvEE^JK~ybx%9o~
zs*V2QvZ_6A(fzk-^1{%NaW8RfkCk87_!8aN%lwXCZBZZ~EnIS=8r_d7v3YFj<O0ZF
zpzmKf(wP$vdRcmNOBr#F=<*>#v?dNA<tSu=q0_hY)kPxO0W1PuS6{Eo3}|1kd8z7s
zn&G;z+df^(HX!F1vJbC#yNSW5JNGqAMaFHZaCg+<weI|(FJsk>d_AaT_K)F)NEPa=
zr0P>v5L(|WfvNSalQwp#$G7(#L>6Njbh&g>!i=#>8vn)tf(NrsJB)@`6OjsBVMW4F
z-wPM-u>#BnVhvb8OyJZmdFSZn`(}@>*tLl{E4BEb`v_Yk{^fnJ!eCZ2^)It0s$<Sh
zflDiH<?lM}cC5v=!iMu$yCj$K73Q+>!aoy!l14WSYo$7#p<Nxnw{Mlz--?enm0)C)
z)r7>*C4|&93ShG++LP|Rf6inah=vG1b`BeUW#-@gWyD6WZDFpxBw3?&$eHp5!`|;s
z6?hy+@OLK`D~Zhd2-WF(2Suw-y;~V$MOL>oj{c!-xApvkr!HIQmrhQ!{0S@G6uhIG
z(P*^_4x#?K0dMS|C~0Kj0F7{ehe;O*qWy@Fzo6lFsPF$J8~%?R_&+XRX72xW`Eqgn
zw`|D$f07OLv|aQ^P-dG3<J%Q0i&JjyF5w)cVO6A2pd~8RAn%QIwV-T7lY(w976qnw
z1UBbWLum+ov1b+*7pGPQ1YRNtvftwN1I~6=u_S*Ru5GVhp={<7d^rve5qX^DuVQrx
z<qHGsJ#u~<*!1#wxu8ql=O#AYPu|q1HKDqDI6FN)mBhW+re*0~mErVZw1sa<6W)|A
zH@%p@V{yVwgdQ&*u_&3OJeu$#<m))`_~8L~$+lh5yhVwAlyA<>7HBJa7?S|d$ImGI
zZ7AAZLU8)XO7!YxETmF%IW$Ln9pF?F>}f_GA)lEytPjgxY~tCKe2U91S~_+Q`=|nh
zRCH!^==Sy}XdGjx;$V(Wxt>4&{5hQ><6!IgfSEn{B?-k%z|EKgK)hy*YyLA=lAiWZ
z{A;v)+`D|-xF8rOMztyN4w)(L#n*fik4>N8CM-AAbzH#`L*dL*P!AhC8!o~&)?JQx
zBKY?xbRXZ_<>{YzL&-cIv(%<Vp&@D7<Jt^`1ion)nsh(dItKj5S?mc5I3KjFXzioF
z0k5-0{Phr2=uKSGf<g!-vL`Wlf^eHDgzk^!Kh`KLR1^Xb%IQNFP2Op5JSkLs_+@}4
zDBdg2F#jR~I`2u70b)D}gBFwBz)4{%^BFnxlW|uySX(HA;OQeY*sQ^SIwMxO+rKpb
zrSSoA>c2)08JB-Sgmv7w?r1WOQx1YRPDYv(n5sde5Q7s9;!qWDOm~VLw<_GW8ldKH
zI$FNmF>RxQ$?Y&q`}aF7Y$a*+X{WeDbZR1dILs7c3v+L@wu)}LtY4bE3#~?%gM0PF
z^WjVMwVD1TbiL$g?&bs6t}|2ZW%J(Nt&hSeo4PP?L7rD4)u{`tI+6L9W)rr_Ms(B`
zhGazDD1{*lMivCW!XReiT&|f(U|jk|3=+w@^vcV~tgr^F92TRBF7GH6NCyz(N1y?$
z592|I?n=1zW^s6|j)lLmBK*AI-!I!6MnrZVk)EDFSrb;3ecvrhTgc{OKSRtQuOU|-
zxNOa#e3R=F(e{A-F$-bPRHxzg@yd-#d2^HGO`nGiX>9GMa_&=vwEWG#tId%L;jWY5
zn7>+4_?o?pzBC*DeamTAO1!r|9jH8kZ~!{Cfz?sC+R~PK%!Cw6zDUl>Q5C_wlM`On
zIq9}#3%BQ2rgq#GDq`j0ly@L%#hOx=aLIFLK*mY+<uYtE9oP%FUk*%lbdaBj$b{!N
z*d@Z9>|ltgXY%~Sjm8|BPxWAQP{h+{#};3^4=5%udM$gkC-#N(B~959e@>>QJQb-v
zML^{RRYDeoVO|f6xrjVOw>~1-mRl&$1WEy7iv?V71NTThDpeu{rxVPG8iInoD--nK
z5>80DUfrzXq-1w_+QtFqTiAI=)Y(WI5$No5ZEgq8T@1O%@F^2%ub&jfegFc_Cfyr`
z4xgWjvW}{KN2fzt)rM-cyf$eD{EVa;HZ^qT%NfPVc^V;YYtX(xu`LVFyRI*)<Mfgm
zXM>kU4%--H>T!?~4|H0`Mr9`~8PeuwFa#s0ioXFHA7ql5>u2warjKYZcdn*ZLG`Ux
zkG8|{WMb7%7~-#j5Vxua7N<*CUU?ZG-V~AU?JMC1j5<JOhg?6-y+601GOAe*;q$Ec
z#399ks>j26cpTjagbv2bw)lPnD@fzU%b2{MGwY9dGVg+K-zlsDG|_V2^;)kw`XsU$
z)aH*UaJ=V<I!Bd=LqHnbH-vpHSfViEg5<SvTZ1dCA9y02LE?p3ji7G3Si(C=O}n)x
zbSG|6i-lyMUc)kfme1|az*}g~IdSdXGFbJ|t*iMI3Rvt(=fu+@JL+LzDdF%l^|73@
zOu8$Lr7gu%>IHYJxM>`E;_?ae>&0C7RCy$JGu%>&u%}>gyq!*_B$c|2goW9Y>DJ#e
zYr~8J4Ub@3E-s-fL2^|@E{?KLs0Zi5<oIGLc`^6$kXgj4lcgSQVvJZ^3TqC<SF_hJ
zn04`<=>?>?Q4v`B!I_#!1g|#l{R-rvf7`JRJE_66Rgjdy<4YquN)b03b;mI_tb<w?
zNG^D{aL`xSI)Da3w6SUG22-@d7~E(+z)J~6{docc{{0AfE89Q-^W0Em0*I*pwYH3Z
zoBIfg7)1eQsR$g#(f~<GS8aOXQjonJ4}lYPIzkDe>qkC{y^wEt6I@g-f>c)?Av_KM
zzdWw5!yKVD!C2mcTR-{U-yavx^o*LA%wtK#xti3;-kSu}K@bplmradEgi35B;iAU;
zaVp9IHR<jXVY*n(`#C>0l*aVhowt}%9~P!3Q#v$&ru;U-&W}lOfcePjg@Pm=5<p@&
zCMaof1?`CoExrW_TA3Pmf{kT-3har>pR=4G=;tdLHJPwO``*Y*)yPNZyzC=r{@CbD
zN1FYn-5*C}iMo)K>$fpxoY$!J`ofkP%@4zxR3>#iW=_AQvAk7<*}feP!fyxnX=^*V
z0Rt^jDv%o92Cumi3)x}O?E)kjRX5BDF39mckz}AZ8AX7ebz&~SNr|kEnm38%OWh-^
z@YJ~6PZ%@wkpLp5$`?wV8Ak(a0k)eq-v)^;lb>P5lnJ<URlV~LEZ{p0As$Vmlpmdg
zY$wXS9(!-_j*R_m@DRfb_mO&hd%Oe|AlDt3`O0v*$E<8g<-KRwR?I0np1ga@gKZ9R
ziP_fXa;q?GwymvVEW>aMZ=)nM(<E+8XF1?fk@J}9H{nK}D}g4oxAk?vpxKX6s^b#X
zwHRBrJ6tDQ6tjh(NS*yi&$0FJzIf+a4x@eeMaR}C{9=AsIzL51@VKBRO(+G)HO$@@
z2ii2j3_6Ho+B(GH6WAgWx@<j?V}_WzR9HTjp3-6`BaThr)7L~Xr}11c9R{iI;-8bk
z^$AKG9b`Zv=);Jy%DmJcoN6(J9-}yue?{659V*Dyr=I!Pi+K*KP7r##lsnqmM--U%
z)Ich@Uc+*}*fs6o^L9u*1XEJ5<E0k(&C|rZmm`ywV0hcS1-?k3elED?;ORgMRoN-B
z8e%rKXf_bomw8yi1P`6XM7LJO2I3o}rV@7L5KL)MVimw2a4Y)~vO*~u6CB7Ii_d*_
zX~-sM{V7gPyizEu{9UwNxel%a>uxg>N-eJ(T)j`~FZX@{HCWxybP~%Um@DR7k2POz
zz_G<Iyx+-1nb<*inJvLyN{rbp!Im?`_g5v)&SKIGECNsm<`QEYFOf?cLeVt*{abi2
z@N!#pqK>oS1*LKyPu7mVIv>h<18oQoRUIps7!i1@O;-{8GMn7u+v!@uZj6Wl51+zq
zd<t?(ff1{OT3y1~fK|b5$LO*lu->PF@+2`Gq)7B(T$)eFY!U^>9jv*0H5k7>D8S9?
zc@f(}2bzYdZXgS|L2yJTYHh4@V}(_bC;(EHU8$Xalot)_5HttRM&ld#5H7HJv7GK$
zY9M4@(C1u9$vKzzpWG!z>?FWEE}pwMa2Fw8czd!;Fkmu(rT~gVP=Qbcbzxo?Mb|bD
zsVv!)q(6pCfS{9#KiolbO;BZExGN)zUbd3m4)BR=5IPM!qiwGuF<*<dM$U-UX5!^q
zZcw8-0FzN!2|2>ukwEkN^jCK@S^F(@QhOfJ?6Kiz3J$$dK+QnGAc7i}Xrn<#U4LJz
zDNP{zOGW>i2}?o<*~)j<jR-s;1HOz+o|H{@qt~LyC8ntQoY9HIcM3_{@!3AZPw&KM
z8H2g=T|6tUV_)EiiGr+k5|qKvz8uJc=Q4Uy&GUO2^!qdfiY?Jeo;k$EW-Ovyr2=&}
zsI<2D@~^6??VCU5&qfslBZc2}?+9JEC7OsV2D+FO8FbCr_AiGU77hlgy_Co|F*!oh
zV9NNO&!#I=A1qlN2v=O5ct0x<wNJ!lCJeh)v5Jc6xX2p~J-DQx7(TmKNz6u&wyRA#
z+jOtX7S`a^+;aIMUy-8K({S>{KPjrJ`aTerc(=c-y@_iOxK10TeIRX!lYhaiXZn|#
z^e(-)Z!(I-FEDvqUIB9v3S6eA@CA}37Nqea^e1(+6GQLorZ#b}OP**nkr?BuLuG^^
zd*qh?Q)$Nplk9Sod9`B--OZ<CA*96FrCF5gGsUd9+yBQ$5Tstb$hEj1o9U{i5rDaF
zr}JEgV50T683fuJRPk!ZW<f!gS^@A?S@_;yZgrwVk4Y9vSF;nO9s0+Xr=Hd&ik`G1
zP2m932iu`4quw2rM01Y&C)8W%8@=jC$NY|+z4wvH#R!-%8fp_!FPLS-)-4kOb5L(h
z%kU$KVHVxvdqI||_(C~H_C%TI#WrQA5k^Pm<+QnznrC8?u&^rqk2X2P<pf|ZKzOlA
zjz3eoZUlsK=jQ$*8p>5z0!zQnS*0X<4q6|2_9hZl$8w{qxvxcI7o4ionzw2K#uGR@
z!!Si+2?nq>MAw{$JGoF%s0flLGen5&toD)YchTp3!90HtQOYK3`lzCt3X7=EwbN$~
z1v74-SkZ3O(d_kJS!+Agzkg0do>rc_CL3D{kCm57F3}Ec8tZ7O?+ar?H~1})fn*Ia
zxiN<trYR+QkS(AZ7!<M>cAWDvBHiy?6OMIuB$9yP+%n(2EB!oBABuIWSAx&erqJqG
z3D5LW`2~-U1_08|gr1Aga>FfeJ{UO1j_JSsQsBio3|mA^s$h8DhG?T|R)L%mF)|X>
z0;G3A<A`EU1$s3~mNmbUM`AgifQRDb`ZP(P&`x75Qp(fVI&PUaVb`Ns&ACay#KxgU
z@Qu!70MPjGj9T0kwNIY4J{V2uvyp<pod5PSi|`8a^*jy3K!CZHq_=*k&oH45mb?)k
zb4_23PW!1yAfDJ8l1N8P*uC_Za}{gq5D24BufOY22&*uE0UcJfxQE{xB^0;i&ZjcK
zA*aZeP1a@)!x8WYxw!n~-1M<0{ql^^yglfF_o~3!%&xQvn$Q4<*2`_a2zWhR=79Fp
zg8OCZPEH#{@){W{&XKf=V-rv{nqTMzr`e#GCDciS?VF*HS0NEefJw}o5qJFj5hn}-
zYL}3kDwip;s*O8}=m;SiP$#;0NN~`f)_V_VmQPyoM=-UhE&&??K`XB@9ptG{j}|b(
znxzA~D2;#{tom+d#G`mYEuoqb`KC~vt*-6_CX@IJH@^+JYNNOJ?Epy-G3wH4D*dyT
zjzOEt+LOjtJ|qJqu=@p#+GMp<MOa_}WA-J{Y;7!I*&`XZyE;e|QxZkmJqOk!cm6|B
z?P~eRYn5{|9y8$tr-XbKJ6czQf4i|2Kn`$XI&mKoojvIT$!Tlukuq$}!bQ7IrFn<W
zklde@!nr--H*sI7;T)!;d%!0_p@sE_C+Iv?EV1V~AFTpB9}OSdfd-0kd6H81kYKNB
zo~##OnBz%=FL(9V2!&$QLHPPW$}m+%f5d#kwb1Ek3XrpzwficAQi(bfPxp9Kz*t`7
zrcgC~VyYo6U2Sen9+QK59SByU5<w~x{IK7M^%m2Z;d<>8eCn6ii@t?k4}({=yRXYP
z<<%z$77W)bVNS-+I<ELE4w6ZWBaGrDY{^{ZPA$3?IKQCJprD^xj7gz-7UTU1EdL?T
zgsI5F_4sd7rB;Tnjk6kK_r>uTWV^6);ay5SCJ{)ZGU)m~F9t<LoYS2J4176eM^A!b
zFw$jgCK92xKpadbJXAh00s?G8Dx)T1X^F&?@$?hCN@&iRIRC?yyvvd9W-BpSE7-dl
z>$4UGwVg~3$X+CzZwsZY3Gaf5%#i$F81r?WPl_<bTirly1J%DGCKl|({_cUP3RxFA
z%jm0cNQ+a)ZOTBlUD{Z<zF*`?DpJ~d)JB95p8TD0-O|PKhpCR*>ZupDOQx={8hGH@
z`{sJ{4%lnmClR=7Pd=uaLq5_l77L$_(i&m*w9{amW<}}QHN)6$175pawWvh0^F+4&
zekN+(dx~-?V$wD5sn;My!IM^8euSYK+zI0%*K;E58hr6S0i-4nVU~Q5C1m+YH)Bi2
z>5Z~kisd?!?i3;$Lrs_|Tn^!y*0O;LyG{nFn{X6h0JAWOk;mUFH+2)PukDd9ISap?
zB&LxNA*Q8zpt0kJb^5L%fvpYkgXy2=Ycq3O3RH30@Q&2fcg3fLRrAs7xCS5fT9z>*
zX;SFFE};suDaR)9I+3>mydDEu@&^AqYR>f^ap^xI6gww7_y6~<asRh>jr;%PUE9{y
zb<pELPV?L{bQ)gN9`1bh^z~s-gAPrgrn{TFpEF-9vF4OsOhX@zxA5h$7@NH-J3j(l
zlHqY4b@}SOTk{f-DulxMo0?Dcu^cjH&+XHcG_-#HBvWdLcr-TQ@7YcPdIVJ}#w>xb
ziB;XLil6}pj}29>Ox$kVWhj8N>`fNNIf)}Q=>3Df2!z8QD}4ZuXwb8l@?xc6;jpRQ
zRw)Gj@X7eiPUXN21Y7Zti`Joq$YBNVs-~XX<dYRl4*Ty!{zm7W52Id6hVa4G>=gZv
zGps|%<bdQ<S?A9x2+-59sT#r_A|i`e0~x+vK2M*H*SDlf^C^pwD3w%IITKBIUOqt?
zR2kgZ=B+9Plw0Z>t~WTkA)3W|T7SSr@ZzBhswS9nt5+m!i#zSL91a!C^}bJ8$?!4p
zsZ)LoE`pIvVscuN5Ao9oRsv_*e*BeUwdCgVFS-&F__ok#B8@cWw0bB(=(!<(v*scS
zwqJTR{vT4GI`M<&@P?+h80#Vysd#P%fcipxP4wVfW<j~BP(-eD%#?i82nch7Azh81
z>2mwm#53BC|7s)n<2K#qNU5vttYQt`)8f7#Vp-DB`)UpVe71p>((2f)d{OUX?IIh(
zTCRuDPf+@UUK;5AQty;V=sfV2#rn-6p1{+G?kLKLngKrNyL;lUY)JbbrVAmAApZiQ
zFmeT2i%G4yl2j=P5Zxg#y$mDO!LS-f{7PzR_uhlO9Us0o>Q~mRrnFXdiMrm|rOIu+
z!@dy3v=ldp+U>?u=`Q?RlZ+MwTf@92SXy}#4wcz5k{nRIlWydgst6$p^iYc8@cMqL
z7&I`I>@vaGE^?Iv(@GXFo_ERP^d(S5%y4uE(X2G&EK26gP6+82!w6?yPKS9j%~qoH
z66Ba=J>sww91mdJv6c80(v@|n>vRiUJOfxYQr)Z$je?5mCt8u?<tu;<LQ#Kb2oud*
zF6YY^kJ;IKV$KQiN#vtEz<|@1>aOUeeB6*Q0zV<`ka;}C6gNYq%hGBl;<cClU+q8i
z4)9PqMNH%`f9zY69u41eh^NET(wnB6;ALP!{WFAd?y5zo^)C*W+Ow0$8<pC@Bcl^>
zhkeIBE*)><OoG??jl;$JtekB*CRUYUZn89|Xcd~M_p4G12$_KOwO8%%kp$3>u!B8^
zu(D1{GCd^?5IA;|?7G98g&f7#g66WM4Mnc3<?_1G*afU}&fJW6Sb#|{vR@C|-z7EG
zGJ>VrP8e{lA!=@1HG$#{BpdHy=Pfn-kCx)C&H2o9h;he1@Dqp*v!#8(Wwa!F*snTG
z8B%iCCZTaIn{%}MXYlPdib%+9in|Up?V7mpt?hKPRHPqrZ?$-rf}u5=e(_}~>&%7z
z<<RvrVDVcIBKYXN(%SfEY__<VzFxpjU8swE6#CV=%a0RKE}F7Hh|e7C(FrhQs=7dN
zJEzj-z&rkR^5*E&{E%W@omK3KtHNsjlt@O)5>-d07u*#6%caxMl(E>wFnG$p42MS7
z1jnCx3UPM}QHVTqTE&=CHT)v2PpiN>km-RZ_S_srmc-y-FB#6G{NxDw0yD>(uruwW
z1LAw9tXxQ!<KEaDOF_<hS;Q}n%teqItD2|uONwLBY^~y)QO~ehy8rEXU_Z~f7)?jy
zRp>5-bTU0eanz8FsbpZ-y=fr+p=cPeO0b_*$?{W`GcWGR@@0>OvxiU`u{^kRQNrsA
z?9bu6=P%cFX-QUx2?=bo@>oN0W1E}?{|UMGP@kGVYy4(llm$FjIr^7$#5JxDQg^P~
z;p@o3N;t{-(8lc9mKPQr@D<e&aNm<df6{XnAa0~u5#I?0z=v2K|8+CpZHO<s+<CsT
z`D;_Tz2uE(&pk%H)|;eU7=%90w+|a-I_XM_5%Z9O0H?;6kh7^GG}lW2+cJ{d1Ab~k
z&4?#w8@Q?_(`Yy7#Ve9^2GQ`D2T#w7WmXG?e_R!3@qD;o>$+V$Pr5WgmA5;8A;m*+
z*&=^8h77V-O45t%W(_#{c>arw;9h~F(he%Vq+S+%OOP96dwUB#n~NV@g^0M^jY8xw
zn&FCuS{d)n(jd2RAL5Q~Wgn1j$>oWDaj!lk%7cHtzmb}!Afga-F21G&pq}xEwKydI
z!8D2fUjK~PO}jbhu84`Z$H;wvOpF^GlGVLgKDt31_GpU86cN{*DL8DA#rf*r*!F6j
z>#5uq;yCp!gp+1kmv@j3z<zUkV#4}bMEx)N@;?LZf65OydH$EaaQ`=b;r>6;mnr9e
z<p=tvW0R6w>gTLx3b+34v~XkY2C2^LC?4*ZX{IW1;?k59Q}?UocnH(%2LFyB<F#K+
zQU=P7Tdv53`*0{ie@}ed_xYEK%Oc>tO-OGycZ6beXp_z9bu)9({3j)$UcK{tL^fMB
z1%JcQz(bSd-1YnA7+a<(!TsO7E&Ok1nEMhT>IUxet^ND+7~Uj6IaYEBZl9+EQaYD~
zA(4<kA9C~xiIkgfcwgAO1dZS4^CysTkk7?8mB{Dp>(0%ZDK(68ci({~1@+m}$G@;A
zjHbO<UOr`M03Ry>EEbC(KK)e-(ym9_M~1YoShLn;jOn%`(LhYQM3!pi4)A#u{CNF}
zts`qmwsl8waXKPZ#IzW#0%42j(CO$aW0$(DpuX+v>u(&Uhd%yIA1elj0>**DLQ8ja
zM~g5LWY8GijWEWgckr-Ot~5GU#8_PP_4`QZH+oHx+P&l3ucj#>e?Q-0jM<inmK?L8
zKHnB}>YInDre7L14fEpIIBe<n5Laf_bhl!_A9NkH<H!SA!ojMt%u%*SaJWUz3f4NP
zN=kvH!5qrQ^8*o$l(%fPFw|_=|DvYSTF8FgvaCfh3L3G=E8R^Z6^Du^lLPId%CJwK
zlE9qh5+W@my<Cf>Xx6w+6xoF9SC4PXbiZVc8L@iK=VM#)?qb7x@+fiBeIZuyZf%QC
z2DFSYOHcy8-=SQcz<m}qh6U9YPYPvXPGnkiHy=9@2+K*`xuf5YFUX=ekh9S?d~o4A
zE2RUJtJ?MM%fe&UNB0It>hUdtdG)(`dsjR<H7nLX!DcS+X)@3&VN%NV=DVH|@eESZ
zu#Bcgwd9$m(^7>jWOIn{0)w$m0!2kKn!D7Xqf_1In(<zm+~kdWQVe5cpp)`J`*BLK
zygns2vKVn|ea{AlOG<;Jw;B=KWBr60dI*{(7H;ZgLEPJ#cvQg8swoZ@GB^$FnCvBH
zD0YABO-vdzE}LPl((_u&3>}tVxtCztDt(TOW?_J&ffH#6jbfxDu9B_i1@@4tHT0yH
zBtU0JSN?=+H~(s&#H#$MqU_zcM4wKNk<cFLkiH3xJ+){pz2X)~IV$0pQcet;?8MiA
z+RFoFV<ry%qb<1wl4mrC{C4lg>ZW$vkvYf@-Dv0#^_$`ek<j47n!KX}rRW(>(JEA<
z-QA`D*n`N&H@H^MogfLml=b5T!Gtbm-Ld~F4SvUg{ws<X(;%K5>efAeaN!Wkw1GCb
zj|OV04QF**GJ7IPU_o~RzjpR`ZB{AUaRGjS7^_mgbc4<t{xNs}FQPdnWpwGf;O_VW
z!aQfs-GrXmm`vHG#md3x6&+xS32En10aP<GrvhOQ=)GZf_gL1~C|j-VNMJ=P9yW%v
zscy-uyyED=DXI#elH;}+4(cyqBj2+hJ_HGo*YnbYM{rF(fl7(4W?{uy*I&%HLH1p^
zD9T2ae9E=5sWytIavaR(E}ua_f`&er-Rf9=r~wI2E{)(ev2(&$o<6`u9$kSIPKBdy
zsv&XXOtDav3<k<Q>ZvC^<$a|??>H+C3hbh%UYn(rzxn0}geolmuqS$YCOe7M9YcTc
zkz^$<xZ*^c){^j9x^#*<rdMJ^X&R$6ZfWqB`WwFadkDM40c92&`pj(u>UEJjH^P6?
zq=jV0s38&$5y5AgT=xbkBv=!mEGeZN!Geryxy%dgD<3F(?D7^v*abySbdH0k@9~Y?
z9{Pgvy`1R$1HHgz`$O@>NU|5Q=w!NNH)f`Nl-7CGUYQdUuV*USh9ZEe%hFmKz-3ru
z5MrZICK9<~Ln*ivmK1na7e4ZWT^@{o6A>R`>lz)@4Ir4!IpFhF1_*?$Zy+{@j?cKo
zIiC6%rYf(~MCJj5UY;6(1a25~c*f9?Ibx(l%lz$C+GzH4>8Qlns>gf0KMAg-nxD%8
zDLlP9P{y!2Y%pHhhnYjkUK2qx$Z+h_^vF)gC2Y`ZYvg2CiNhM(Y0OH$^N1chaC*`E
z<TiwM&{v3g5hMuvI*#tX-Vds07fb!1nCo%?CY76W5Jx;$aSp$O-e}j6t#;mAvYc(*
ziISDe167c1KJ(VguqXg`)BP-XIt?Z4c1oOMZT=IckqWUnFe)&!XFG|*$baJ2e?&tk
z-W;;T^=gH<lxqH+rT@C(ait2ocW=dlI>W_2#mz2WtL&N0px(vD)0^d{Z%Z`O6~G6|
zjEUZCaEH;Xz<C~l&_kNARa$bV`F6)6-<7pFZ3jy(q&1!txK8Gc?e*X16#{dVWm4tZ
z(#C#Ua?kRa(MTicG^TJ5caXC7tQ+_$Y`05U3ElQY2THvuQ{V;wR8?dqp-#*&O<219
zQvI#3%<?RAWk<beBL@RX=K1M7VhYITR(K_vb>8^Zi^QXhs|A86iO4SL{J;>h7-^zz
z3mheIs=v6W+W19o<2WtTm~#@xSJ^=sd}pdS8El8D+ZOm$^m}Md)r9p}rtZ{5oQEh|
zJX9r__n#2PmIs0DBJ0-Jky!4>c(2hy34*qZM+CJ{fkZAg{g<#3%rX8g186^z^|X>%
zCYkK<T$^;);DuSb>9$LJQ(#11lX3%NZGh|Yyrfd)0>(*~PbY1rK-g}o+N9v)z>u4y
zs2=2`apJsX@qJv@jJP_rdXmwq&AMOg!8dHS+}g9JVEhL8fbt=^Ndmh7=$-h0!1DUX
zKoTn>X2P2y+x`bx^h0!b`BbZQ%LmLEeF2d>9WW>C$1JGnM|SM40sZNG#nIVp=KWRO
z3ldh5*K%9nk)6&}M|9+b`0otpD-XcZJr2j{(8LGVc0jTTdYP)Wmeb6}HMx!PNk405
z)mu!ng2{+rQg9f#a8z_GQ36$4mJ}6LeOFNP_Y=RAQR-xC9x;+NW*=&^)}J1DI7*QI
z)v`f8ElHRrlG~W*$R?M1sBA|L<_M?cdTDWTTzA*{Wf6jDT=agP3*jD$L|#<U`b3``
zBEEVxraI=lA)lsX(S^NQg5k?PB3{W^r>OP<8P?h-BUMD%A#AZ!_d9bg!;kuTksn`w
zWfhA#z{}yZPU;tyW22p;yNV}pac+1v2S>f)vo@&9V^_bg8)0M0l7A2<3NwCWrq}{H
zG+lRkGngqPYBbe7{KS?!SMjNmYP@(e7=Rdsdx@}~hDBd<rN=SW?t|(((XDYo+?=IN
zxI7jThl{_phbKy>iMyO=ytG!?(TSd82)G|mZPOzZBtwyLn2#<?A{w$!yJEz6Fb|)<
zU7Sl&oGKKrI+jySLK>`Y4;MP8d>_>)k;~I|g(+l%Qx2KiMk1MX0Fw_kOZO%9@p?SB
zeCC$_F5F70^YBDc5H|^`5ef?Mu()u$v9%n3%{gV?ZYLK28sx#dKEFcmzM9v3Ckqit
z2!2rG|AWhsd8lUN=T1N;N58hnjeAy(^@9(TqK@53wn&tw@$p0^@cQSeOT83MRP)xz
zJNJ7Jqrh`H%Iz9-7h3`@rwb95s`b0;!<j(3&7#=TyJHb}U+nD{=cUM{dt+VY9<=LP
z#)Y{NRiGtWN+G|Q4V%AoeAwo%_uxuUr>VE&GGQa7L^Uy_-#*z8h$}f>rLyxoW+&<Q
zI+k{rSC4<x?ybqb_4FM^Wq=`Iyd>FfUJ1@b=Y=}JvLJ)^HFF!MG-Dl=d3(`Xl4cI7
zw=eeH(fn=YfYBB5nwiqQ>_P?o?h-CLyS}G+Zq8fup#tdILwfNEir@W_FAs5{T?M%O
z#95FD_474oitmzC|90MlfnHg6nKOAd=Oz2o=rB&eV2NV58d8YO+4UAcyi;O~D%CRW
zNy^B|?0pw1h-{S{YtR^1p+mrJXJc=Z__`B*-1DDPOae6;Q%b-BnZ%g$E{%Rm8vjeU
zcD8&+i%T{w6jd|ol-Ce4*4_KnQfeitI}y6o;GMnbZ8*NHD4;x}$T=Z_I+fGJA-|vp
zmXgeWhDHzJ+gdv(b46e~7wOr6pCcRb#Ya3^VfF}3B;fG+VlRIy*CsU?tLzRA77)I^
z>vtz_(AP6>26iN16nUb_{gC6X&<3TqVI?8R1WVZS(h3kqydfM+*FT_{>*ADBruS63
zfU>V!a_XY!Pvc>M^{Di;Md~&dQ+TmAXCCmGb7E)&ax%@Ni2a&uP{mruU!8e_4NRL)
zW$77bhZ<ehv~n+#3?1PdI|gSBcJHZzj7M;mN_C##k$V64^4oIw2%$eL3L}F*@|375
zDW7!9@=j`8UhVEqC#^fZnJotmkKmk^QEPb%<bFG}?q1kI(FgE#3#b}YjGTtBg4i;j
zX0wSIXPmv8-l&KQ+&Nc*lGBrTrvzq|$s!QoZ8-ko?9M_g-`&OEG@A){tqkbM{ya1g
z(OpI!3tw3Gs;yn=(ue4Ws{tCk(eli}=yLD|CcZPK+`PbzT`tb9Xo`+jQ2`<s^=yc%
zk$!-wr-Vq4tbq){OF9c_%?lwJN>b>wU6I0r?8T*oJtpiz_UlOnuI?=4hjEtXaq5g>
z-tz>>ymM_FLKdv6!#j^bv59U`aa?<rF9aXLkxCq{*Q?1wZ8tu$^DR?*$U~nqY$vHC
zl3qQS3M#kcbSVtZU9^zd@QWocZ1EK5WKyLCz;&>D#COWVRVK$0s0WO_$gA2J#FDr%
zn_}wLcJvR*FRQ5>Mtf4lsAt~3)KT?q-anIX=43q$t+0l1to1xe4oriz!5SR1n2&8K
ziKT3@8?KCoBa|Q{2WI2hwk3av8FnQ*dCGA807mN|{N&e)+PtM7F)oGpMxS?{-N~cG
zp%2V9gYgIdB(}RpN13)v9#1WjF9`~9@2pPY{dsIDcdcLoWnY0mg$=L^z6q>Lv`Yxu
zdM-&yFH1{0Z#nI(MsF`Oc${dt%26xI>sFE1>8Pp=!PHGyz;bF-9f+3OGoG}LO}6k7
z4i`p?+ePz4)jQm=k!ZWeTan-y9C8bzIGWsa#pah#97{Jj*KeR!Zl6QlD|C8PZx|SZ
zI%$;?lu1)jfOqVL<N2WTx;0GB1IsEdvB8<}XPM!>+%`3-VonV~-z?FI8(Vncakvz;
z*EHXR<2oGbzOb#wWFm}*Z(YY!E3CHQb6Ph~Mo5ELh59%PH>Fo)rCzwPrB!86|78~7
z4(eAn1ZS#Z?%h@>Ph;|l^N%dzjhc*ZGFC^H$W0^hRSgeWmwk~x@dFv&Y};&Nm~n2p
z2=dT&UP4QTZ&8srJjpY576X}FRORMdt6cE5IZk&Rr<8^&_w-rGMK%(9;#9fi5d!iK
zvd~4e-5eiaL^E?uFu>hwL$}xOC8|c$kHBu2e9Tp@PViSVDrfxa61~^EELw-qR!)eH
zP{Zmy#M><o+6=_kOb|1A?GW|OXu&8Lc5E;H(t+WkUk5DuAx62VOgvMQ3ElV@wH*5L
zC*?6jz2ECOn1#MxLFaLv{IvV-s`HmU@}4|DJb7J{G3GE&${GICR6U94HJZOncTb5_
zjfs!-Zt|pYC`gHDFQIeZ$t-6MPWK(EulI@^X7$F0229!nw6?hLeETimmfBxNRPIlu
zTn_B6QM)+!F$f~YrF+f{c1M&l>Ih}7dODt=??rCE<K5*je>IAGC<?)S*lDU+jNv>5
z$wNm6;<U8Y#WF~f?veAQ*M)?f=qvEQx_TUfZ>fsSqsCbFF07HhzGy0^d+Q}5Td~c+
z618<8zh}Vur6@<IUsM|Cpqdr2Yb&Fu>CXz1fW$sh<RK2R@8oKgMs|mmDX4JZ)Y(&G
zf;Xv#7xklp2mZy%roxYv38Dh`!w4c$J{a-glNaxvr8B!S)vSpz9L<GpskFUi$+d}N
zBA#yatcfwXu}?=NMti+@m1>r5(yH+a>qwpUL4CE<QcQ+IG!BOnUP?BmdB%UZ;Kt+;
zdIwsWv_eu6vs3da=*L%@E4Pj~7;LBO_^{-?xZu+4Tv|mZg6+U=&`HSTf{Wnf;_`0m
zjf*&e)Rl589?Yj0ryC~sUhY_gdG3^K%VCKd`7s<Yz$Prf^bq1qATOTOfdSKpk&Gs=
zZ&jr(k|P2@WMO)s8IVAM07zYmT#PRqN%)2|7=Lz?{)CL7+(W99^uJ+Hyh~y*ecBBN
zLpLk0#^ekpG-`T7<*wi6q6r(dXqvnrxc$S%d~n9M3Ght#;9#j#eI^H#CBJ^EqvaDM
zGrzk8Y}?30z(M~Q1V>vxteWPtE(q+AuUd1K&yjiJ&VcnOoRv^NQk%G0zlI&Wb!>_^
z+>{$354XfOS&(cOLiwHFd+xe3ph%=*vin7_9ucZTv1JA3f_H|;_B3X$VpPQbx{(@F
zg{_wTWv^9Z+sx1BnUgzvQj;V@(SYe*%;MuED7SuLq`f>@f_($9jeg<iqJ>098Ej@O
z7q9>)d9@q3in^U%(TbNJ&oz}A(Yqs-(5A9P8_C+ciLrNMnRy4LN4K>3$ii9ZXDd=x
z&g{@YaThE;S&jg!SirmWd7@~F%91W=I5^a__zRA?;|Bb1rR{$a6E_#v|3^$b|BaY<
z{!fT$Sy$EtzYRt3Pirbs0~eC&2FebS=8+MVQ%=f5{j_clxVSUTCb_t5jA`lp;bQEV
z960zTV|q$%3KA|7ORU7<g~v_24d*?_(BFl=6RzqLrpx#9tEk#v8bWoRuZO%}F8)p5
zpXiv5UB~|^NO(GP$LRZGLne8YYN%T&L2>#1X#Nx*lyAF6&C#}jaBezO+Gd_IbaUI{
zxZx$#%^S2GtD%Qz@rs+*-Y~TN`7Vg;w1LN=chH17-M&9FApyqWS$QWN5B%5{daaNc
znsJPh?MU5#>6|e8jM3)!OsyAz{u>y-!wMy4Fla4Edv<xtn=4MuwWaa+NZmml_r&Xa
z%D`_BI-;VJ=N`n=gCi)gZaFGhc6sa5_<=dh&Pr1{exDHhwr|Lmcp)VQLPwqL=xP)0
zu(3&gCe0*9YLdDl-P``2UmjraweR}$Boiz5Yj<e*uansx>W~@ii6qG?7aapn_rhZK
z#WI&)U_GWVw{JyYu#=+@>%k!+5?zkkKCj3*kOfVQ;j$tniMYESXk{eG4`UYo;Mg*l
z5rfFY?qctWx8rk-Ks1^hYwW({c&W-y=K_=g0aAM4SW(+OSVXa88j@L|Pd{K)al_{f
zc!o*x_JDlvlJU$kEGDCeJ^8v4#?g#W3q&e#Ur11ub0J~n-rg3{h`;EXF40uy*s)9=
zL36t%&hIxRFWwmcbR9wcvv^ee2^&V9c_;5o2w}PHdfgQ$chb(jI8e$=jL!@aH((6_
z0O0uocOH-T0RUgHI6o&xXVIZ9GzLkV#;XKsYYdFxf4vUl6+v+2bf}i(;LLuK8hw;D
z_T@~VM5x$~w@Ij8q6r>2>4C4TCMe~JIS{9ra<i5MSX0WLy1zi^xL}WAzYS&{7a~xZ
zb7!G_D{2^3?tpBw&{r2yx>oArOv<1kTHQOLt3Xe}{`&pNBI~++(3S;B<Shu<(ry}|
zUI7$u*IVh@tih1=R5+3lYzaea1CrwhG-C;e90}<^BOZ%K-6Tb%?jf+jDrJ<xf_>L|
z^cg6SA1(s}G2D)jK>_?x`F2S_pH7i_-S(a6o`fzjSntO=!Lw<qqo8|@yZHM#BZ2C!
zyaK0KS$yi&2~tt~7E|-MRPC*lq!ODv&|B<Ks5_P7EqXn^16beV@z8MzGnhb%GeTXl
zPSA!-SFxHjr-;q<Z6)9mm|o&PfI%K2>YZt?d5%jxc$#>j;_eEgoP-C#8e#`+YkwkJ
zs7Jrt4x%hPg4h)%XC-XAg_6U#-jJlTw+rhH6f0LoK7R$u2!$1Zkso^9<=xBpvYVwc
z=C^>W!_@&qOZEzR&@?Pu(l1So6xYH%;i{wX{E6^0z?f3kg>fGRX)3JU-~o*3s$01f
z{lLdbHK@yxgZtoFhS)---k>?D>gGy>6H3CvEm}VjA|th*uRG$(aLpn7NlPFFcF2A8
z3#+uG1UXCI<iXJluLaS(@Wq8&!A_Gf^HkoEn;Lo^iTG9ZXM`o>q_UHs33LsLZ3y<e
zDfZsk72fJw-ll}XdSp7&ROl_BQfIDn`s+*G-5Av{NF>3TO5S+Au8!cO_7IkTXfx#i
zDi|=0Mj0)b|JEb!%2A%1SwaEPR1_ufJcHVCYUBruPc+bOzbblu1sdv8O2oye7IdGD
zUl16O`HpoZ;J{b~tK8$hEs%xu6Fw^6&THkZy`wP(S0E-a>q>+!YSWxR*0I<V59sZc
z!XKjts%Y=H5k7?P{`N(~^ATTp-V!qxJGJi?m6t)MDxuYPI*7Gl(V*-V#J;AEaM+cj
zD!|*pXqTX|tn?UJ2_k&a=MpwW<;ZR5CFqWPgx{i8mnyRLf;?g17_sa?`v^z6iaZLo
zd&CxoVyCx}r81H6)>u;KL>`IDQl!yT*UOlsWhvK8*d2l)C#qFfUHIkO`V=LaW@@#4
zK)WKc_jEgwLll59OW}C6d&^gB+(Ph8m<MUgq>>A2)jSP8W?|6Pas}4wHn}_QxwL?9
z1C3Qeli<Ah7=*q+B(=Aos`nU+Ycwu<;INqIjkV?!7NmQ2Opex(z6d)>B=AR`{G?z1
zS?h~SJI}o>2CKC|ju2vpHT31?#HJ3GLZ{!^j@KAF$S*Cz3s*j;P7R$>;KqcdKGrFH
zm>0MZYL<Wly*q>7rb&K-CY9<nAOK2=c`Y24nb=i_uL?Ylwkg4qXF6Q%k(DG>=+sX{
zV>UsYj&Z7eIi668WHq`rLO;-ssY$hXe_UKx7Yra#Ls2u$|5!eM<SJ}r-X2TS8+7w4
zdi;*#AF9KRHhiv7e0b2CWrM}`utCsvud#;t!C>U#-Bewrp65(^f<s)82!dJlL;V7N
zeMQU62ep*~V3LOR?^*tZT^9rr8rNw0k#p-5CK<>sGhvpO73Zkh`BaaebfP$Z!JzH0
zzt)gjj&vdP)5F1JS{6Y;nwfu>s=!l|{{Qjz6<}2@-QR?O96(w^rKJQ8UD6=kA>G|w
zQc6oB-Q5V%pdwNN(g@NbE!`>Lx6eVm-0QvX_x}Ii_uPl)u=nhlJ+o%bnzeqj*6h8T
zH0u{znyfkT*-Es+u*0_-Ftid9C8A*+qiHjfoR-xea#|C>^#(PGbbtdQ8Pvs>?;{o!
z`g~p~C=VS$rqH;XA3Bdi*+gqaJuUHx@$7xQTnhJGZ(wPH&kN3tpw1`*2fa2V`rVvj
z!#Hwm1FIXIIAlh_ss%954?V;=Xm29XVJ|$T_oQZG=2+Bm4#v!N4K~brQ0Te&P>3|%
zlU=(HA$+;dS5>Tgv7!C5h_|COIUHP%IBbgsp7r~R1~wv<1aHSXx`yno@~aqTYRRq&
z&zs1f>5@y4les?KPdLFRCPQpEL=U}K79~7Gz)2D(B15}H<>tTe49<#Ns!QpS41(FR
z)b|&@j8Q%3rH!m(l7v()!!G86TuVXmY<sOX4CJV4#tK2&=%cm4mi6{G>qnEHRTB8U
zzpdAKvTsn}l+tTS!uU}D2EM6_e0kH7Wl+$pccUtYtr5;uO#SfjT>O2CVCGnA%iGT$
z*i%@oSBm<*;#vsH7@V-T(6V?hXjV>+yLe}Cl`QL9+EMLT_2SS3axF|%xm8>-A66`g
zf(yjw$K1q87z^^~n_6^x;A*ZEfthGIhdmr?e&O@9A297Ij1%<8g*YMEyCkfpAUqm_
zLLRsx&dr3*j?G6#LBm^*dbWr^`Kr<Sx0-kq_gJh*I7}7ZAFMY|*EJVZXyLvUy$i;`
zA+3$A>Gr4C(0#I-+J3Pf_2k7zt3BDObt!VyKJaW@ONDkiUAChzr}Kz8JT)_h!+x4~
z>emXU`2ku7Wx=~k<^l7TXt$l`QmDhrVz=^Xtv@|}XcxTXqmsx!S=!D=W6Crlb1w0b
zv`0L~!+rnNJdvRM+~97+J&kqm&8-2Gy`h9XD@C*VwyrtTu1qdjZTo(`f!9aYEJ>2I
zdg0VYuU=&_?8|$Mx(wcGqV29m#+nh2y34@;x(I!x#l0Po#w;jTw#aqwMl8dyB2Gs#
z+>Em;XTSn|k*`Y#ileMV$iod2-I2HN=}F`?r>Z)lq|ba$mBVj7LD*Sbh&RvR>^&k_
zO8Jm6sW9(mc3R2d6v!};gAyCJBPe0_VeP%?jFH^5MGTQ5(yQ^U<=Xa2aygs(WqhHL
zJ?p9Q-9>y|UX$@N4TC~hgWrboZYbJuxPQjrY{yU6OiyubcG1S>mu|}4dovTwX8a&B
znOocve@c5Xj1PT=yMqq=ZKm(MH8g3#!{HX;zIIOujbXXV;SGBd&7KOO5$mK(4}r+{
z;z`{u%l;n;V0r0xbo%cvWpuu<Ml0G+U+t@dKgkgF;;d^aSaz(;XZ9*0#QT_LS)_2^
zDiMVp&PM-<qX>C3!K<Qe-Z5NU>^9YXR4Nv^UJdrbtoFMVsqV*>36oh%%uhYote;x+
z@z7OH-}X)-HF!DUd-5HFzw||qwP$EtK&_lvm;^(qa1J`~Wd0qTPb3KGJ;YzOgt&UJ
z1(d$Qg!!#HFE1H%^dXBqnYwo*tQlN0-D+=P=^_pG{~jeK4Hg;7CYg@m==G8s#i1hR
zd`E&kpSJSpxhSu5Dow15Riq;h$#f8R2WPIu>{sMe{O}DrCkDmdw7LMLACzo!t}j01
zp<2D69L9gQ6xy{xC5k%|&A5BlBx$lKRZWk`^Em^RX#%_k=Ofjl7u(elU?If7z;rxT
zZqg<77cyLIb>Tej<&IH!-Q=)ws6yLZODoQ=3Hcd|oC|p;iJzsg(>(j`u!&N;tQvvN
zi6G*TI=MNvt&M%#oXw<hk%!uZ<Jd3=J-V_61wOny`VHd{ee>|K@Psq&aa6E|NLSMS
z1<k4X-UW}2aZL*B5<b-c1<&nI-bc}Ryt+a}vdHDmO3z#l<&><OSBUaYmCi`Z_BdTL
zSzPuu+|IvpCR<5(#|z<bQd*feU6e|Xug2RE$nJP8ex-?hzbML3MRAw(6)*8f%-$`-
zFdQ^63O7<_T(Zbe(pK-`yKt&qy?}?Snv1zZ9qQjAZ!gf5<K3TiQ__a(d$Q_)+3;vW
zOj}BX)fpq9i%|IMn#Kpt++F;8rgi3rZVT@39}Ll1+Y(oEF0Z`g%&#r=M{knhtXvKB
z%MP0){6VW+>MD)x-9`&LAylFkEck>!85NEto25k#CqOckRZ!f<ix?hFeDD?l5r2<w
z*0F&oHw|<9*U9>p*Gp~Y)1qC`bVAR!N?i6s5mThIh`U^Tm|1bZ#Yc$jPUPMOl^7jc
z+rNnkJZG_r_s}6H)}`XFD@PidP}tiQvlo|r+?Y~KQn2k>couc+HuFuY@G)%we+FOm
zyq{2^w~oEM)ZN5GhrvKm%=&gMuA~Re=~Z8cdk5%S!2P7h+YZHvq<F@K1SB+q%HGA?
zD#Fw$XUlwFo;W7QrmP}iWrge<tTvtQSp2@(1G=<?Z1!MbXa2R>gX{Or9$f#`W{-BQ
z?iivP%<_-+=v>Rk+F0*oD*V1gN)_Lw#m(G>13quMpyzQ8<6_-9!L&WPJo>^UKQG#(
z$(R%e=|ffW;}YFd=em6_M~oPro2TVxKe`UakupwN_fo1j+uht;DR24kIZj9qw)vqv
zmOZueGO201^RTI&`}^Jb^b9XYO6-@yCo?f?t;EE>>HA3cDVJj(@_4$$QvusOsA38U
z`Q2^N(aZZUyw0}IgrqfsNJe%Lcu{h~6;F@9oe9Z8wtHynK(>3x5Q5{{qYC!v0u)<_
zns}QAz8D~HejCEL@66bNtjy>(v~$ey#5xyGp-)#5*xq6J#6|Flt0yWMmZAT{cLpLl
z4v6VPA~skpcYP9;DSW9g>cX-+`#*AX@b4Y%pJfDPq@J(Lju(_L$D)$7*C>36;&xGf
z);z&Eq7;B;qPS0`_~=E7DxZa%gVfqiGqG`WR!v9bx0(?KA<gnWw)p%;?swe|uikZL
zp>{gheW6KN)Ju)5xT!9hRTb3;nlQhageZJK8;Z{z)6xHe$M;i|3n?C|(Fr$NZHuMi
z+r-#?!2tJQKj(Y4%v2x8<-JaK?^bEz8nE9t_JapL(~*}^s??3;A2SkUg&e;_Z{tI$
zB7}|c)_<Z@I{B%s7Pa^c=h=6xIUfNY&TW%}ofl!c2!<#~AGwP5UKqt<rt<1<#oWxU
zB=et7FxQ~dpng#eV)Vl2r*ceMj{X=G6Wxe_KKUT0v34NTA*N1{2ndt)p=c|(W#l4&
zM5Aq;ID0FH$ll)rO=GmKE1SPw6&CmKbyb<_pBs2KmW!z{vbGuXJ;X}*L8MAkFPL<G
z-jO}N{1h!_`V$YiXC7(?Tx?1^${nZ0PiGenc{<04f=XSpi(F@j<S#1nMsDgB?cnCP
zwg>Mo$_n<;^bmY?bPF9rlKQcmiYoo!l(VHVhOJRwx~VgRtN!+U2H)$%rq=>djEI<|
zURs>2aC?q%S_O$#+2rV{i(c)BQ8zFZeZRujpB-D+s4lJ|rlP!Ky?B=;7=}-5s}5sc
zuHM{&bB9-%mV;<hpT+uyBGnv)89bqTuyI)Ex#{w(;}(-JDH!>-y_%{60o}aw&Z8BL
zH>(<)1)ZKn3Wc|x3<<Hjq**CWAg4@O#R$U};Eu_=W0J?C)kGDe9bmqsB5OOzc*r=w
zTFtRD%d@#FSNBZcPy={3k!d4Mb=nre$%krD$MkPFv~#+BOWUybEO}E89+?17>uASR
z>Iu5s)|x*&W0Bn?<L$;^?CtD2joze{Bl>Doe)6t*f|4ViPDUVH64l)$kBmKDPO-jC
zO^M(*#wedQ$~s_N`h+fW?@5{rX|S4a$L^T@6T9vkx@8&67cZXoTNz>3aXTu7Nu&&X
zip30{c9PajDyyTiCgoALf*m}b_oI7Mxa#4Si_qe)I?Z8X9y(^EY+St7#~9q~$*UDX
zel#_CgD%X{a%`^@?U4o>f*MhmPWg78%h!O;MGJC{fIykt7>v9HSbW=^A=(F0oXmW~
z)KBjytK;vY*&%T}qe?2hhr(xgFkN+~L5eY2LE;CGj<{eodmjE0#+RouqDrfULWRU<
zaZbD#XHdnzX<Q%w!Cev#wHu}9Y4FpNw+-nSBd7_dpJ^oBtgF5IRBKF#{-f>oEXiY?
zk$%>R8xKWoP<&>Vn}<Gs_F{Ww@Gh{xZ-+%<p&;ieD_PZt!;U+)sXJCflu>IrXN8Pz
z7@Cs7@|Z2fPkkzdW_Rk^LAo~dqTvdt%NgNaqMA2HMZeRdL{IrDss+cy*kIE~8AVm`
zek2b=jf>^NER&Bwo?z_?cz$EMJ~Z%z=+J*31HR>Kl+{B3qe9qgD@WPTYaA<}f@=>W
z2E!&e`Y}IkMiKY>A%?7I*QYr?X-Gy)lnX*P{c{~=^F4o18%My-48xS8dVY>{M~!II
z@>?+;RD!qWnDuBbHoyG|VM07fx@)rAjmeWTMgw&PY>Muqn=&-%EUZ&;zw{Hy8`YuO
z=8xjxVTzU7<%XiD?zZs5;ybEI^^a@P&YsjDm2KU^>UCkRYDe;s%gwTD949kP>10E@
zSDYLe_a42<<3}K(a3p&{vNf!KOE3z`HwUZaBZ88;1>uzj7#hqd)b8d)Y;!`|_!Htt
zuF!jJg<Rn#RN~~x-}rrD&XhjhC&$^`-<eImXeuOG4a1u_lgJN=>?a*stw*{++hm}S
zJP;1MtH#ngxgBdS-n>C13&V?N#5SigGpF%G0H5?TQcb07dTq36Rily3TIVrcQ~cwE
zvACfQL~AGHN{T4#w|!yQoF{W*uVadMM52p}*VSnR1k~-Y7$16CdT)3sNXnC~jyd;p
zJ}QXcI8q(wf2Q)Ls`Wfii<CU&{LS0Dw2zz~q#PSoOW{YA&@T@55uf2SMwzpEb`pJc
zoCq?AIpujKwf1@#TOjh){Avog&-CB~jaqg!#$2u=CuX&8EN=~!-Y$^m=?{aLM&O;}
zp8D3YDgoi_$WJ+lEuyMyiqBsr+oB{S`C~q*4`r243=}i0SBZpiK$HG-aZBOt10_U$
zhpqfn;haa|!!x%q3rG{k>CVS_4DZNe86m&rR1kP6ml<!?U_3KT?GT+drNA~AoLHKZ
zInw%oQF!OrBD|xU&N=ZkA}faWleAkBV}d#y%nSqj&3e?An2dFNLtJ{5*e;2n0q#dn
zxrW#7;>e3N$iv$P8)%{2Pf6@DeckxlDMnS_vvIVid|GZeQoK#%u8Wqa>}R^L&$7K;
zCax(5@2%7CnBI;<tuPm1JV&LKCy6+l##hD56x}6qCc9lzq)7US*}PGewB(h?u86*d
zc(pY1%Lfc9)6#Y9lSmKkL*co}&xhH|P6wWGlVH-sy>S&p!Sul5t3_U%cs0Nz`}kZr
zykfES$Rc{c;whRaOhlY2=ZXQXjzy9rqTa|iQl8Vm_WU~SD6Uh7I{TqqM{}m_piG;c
zTI-l~a~)rtgQ%1}qVz0Q8$>3HXYzeQI4u4k6n{mgLRfzgSu&zfxMC{m-g0wYGp}&k
zOg((PcmNeV1N#&B`b_-sR;|7^H;$h62REwRDLZPP*89>jOUm|@_y!Wa_bCk=J2*cd
zn2;b*`ff+x$94y9W-g2^MnInD<%VE&+zVh&@#CjfE<x2srmoz|jz#&p57*bf*Tpq{
z0##d~P*4Qw1(Aeru~p&gd9PpuGR4}&pGL40iM$dC`am<83?C4JX3Z@`J{tAD7V|c5
z@@?dk?fJB7Lgc3pNy{CK;B5}^=)lsS6_$<)N$kIV;LjD%<Jq?Ezt8Gka`RrseBi_$
zKgJH6?h7yi?$VarVbkKBO^k)m>N~Uqa&)Q%jY=yItGRa%MTKt5l*CSYQ>iN8(mh~>
z+s7MIYftx)5*Fgl9HMaC&)auUMyHTVaB*N*s*ggU_>tKR)6sG|&(DH3%`YktsIjHp
zl^<(O%P;ZdbICov;;8|TiQI|PeX*QX21ETALn|gKbY%WmZ1W=vFZmjdDBJG&8C9@&
zL$w$_ZULod_r5X``mLq)2V&1_K0dcsCmyX&LLUqGv>u!Awz)W7|5F1T*U++>{XQwi
zklgTm<N92F#LE3Fv|yYQ1<S2_<==bLcsJtpK2Ua{Aq4drX@4D>%aQv)&nKxZo#?xK
zFpH>J0DF5<z_vU2M+6%i=)EVSKjz2t!EJo@JO0u#5kZz`no`t_BjM;53);7Q3SAfW
zad+8uN<Pa>wd}r~h^5~q9a(j0xh*Qs6qBNHkI(!@n)E|@D_q&FRkr8hW)pTDr9rh{
zm6-MUOf;9qaX_+dY?gJ8JebBa#j*<NMcyPXFiq5?FOeGY<#4yN6RpQHbCcbgcR4mr
zCr-iZNl$Be|B$m)=RCDOemf;C-;p^^us4RhE`^LLZ|h6J-f%zrK~Y<rgY{7ZuJi%d
zr2otDFU$>$2FZ99lftYlv)uJ)k~dn_3~;u!!#euk!1`B=D64;Si|gkVz{PdDo94Iv
zaO{=-s4bCV2>xR32G+XZM!VU!m**lM#6At?*HBligbgfI4EDOx`9@+@Vm~l9e>|QY
zvR}LMy7A*%PU1T<N=(x1yw8?M1hY+L%IDOpV<LhTibUudc=}DayCN%Tl{@+q>u}0q
z!{0utUZIRVS+!}Y&#U(j66DQ5*Kg1A#sxk`LUC8<ZAS+14cQQ^XSK-Dv4Q>En!yBk
z<+jWXh3tEn{U5DBz9Zw<lE{XR@|IbH2I8esUw8NfIl7pt<KxxaTM+Z=u=`r=q~%+?
zv^rG2x=Cmj7+Xm^Ea@ie3=k&t%7RU%nbdxGkY-8N`E|*X%tz*}1ig0WIGzaM$j*FN
z(>?Z5^t~WvGxce>hJ4d{bc8|6*F~fi0>J`3uEJw%(O?!dJWS(^g*BlM4XYYR#EQD?
z5(#dxJHs6soU^T5NqFXpFI7XDR~VPEWz!u@jgBUyUU#+d@oaqPyuC7%73(zh!SAAe
zu5F#i_2ah}sQfpEd7C`9auQ<|rhHYfqVLtgqG|7NgOC!8h2D(6-dme)4UK;>QWk2x
z>T#}RJK5XSr(O>idBmF=i(a_0V~vG4`*B=B_&^TCsE8dT6{G-Ww2Jyxc29H~u7q7P
zXaR%3#aOChfueI@n76Ai;BEQ2d?rW11p3mr2Kqf}A{$CHwXBhe;!Ku_*wAsBu6|Gi
zjPiWkN$cmyWl=q1<As<R<wT-%?4~{U;L=IX(oki6s+5e}?+8^V75fJ8Yu%j_r#fW{
zw*2ze<yGql#ltkLcP1nJ8AJz6oeImVjQfN5CPG0gXBrvYrU8a~c#f5v;&$)od)Zjt
z5$|=cBVbfSy<;#vpfy&(w7e*GX4c5Et?K#6n8gixiTkeG?WUb+MUzKrXGV_eiUn0M
zn~SZKkgN=ua!~j8q{$SX$717z6n5_yN(luo%5H(XdwSd7C7iI`uEl;Gjy=Y?<wJEt
z&^#NZk$*HTYWNFL%hv$zI8?Oz;oVs#%N?(EbR%yS;t{-fcFUq;%7&xpL)lC1I6Da{
z*>kJ1J5>*S-Yu?*!pPU$c>m1wQ{2Rk0%BTsym9I9g}-K63Z|oG@FBVNLL^UO<unt`
zoq&r%<hSq~@9S<g=G6Z9@vTlx@5S2Tf^2yY`zj7<?Rv^v1H=7+Vw0?6JH<Hg5q_a*
zLuOk$bIa4b)JE#os)W#_P7|c^ezDstUcH>&T9cST?llPUM|kV%6-?M3;PE}A^^?Hx
z<qwBO5Bb7RLK>S^Kdon=N{n#F2YH<8qh(Io9Qpc|e|94Gae^slS0-HF2jl9Y&0%bG
z96aido709D@F=_$isDd?#6#O@N{~M1LVuQez{aODt{^J$;ZcLRU@3V^A@{>4aWjf?
z4%H??FisET_IS^oy~ni%3MbcNsh)H8)O-vl$3^Tk)Iw)JDOR){%-ormiYiFAQQ!(7
zA{Y~I4u58x^*ZAR-Qrj3g}WN9SV*UJ-0Vqi7x}gAI=iOH#8{+Sv{qvj>~8cb7jbA3
zO-MOD>D`hdshdwyTP#!@<DGaIndgZ&LUP;=TUP_TV!ZgkUPomiJxz{L#VHeHOblXD
zQMcEWrzlR?3j$}XjA~s)^$Kj-^hyMo!<21JZlIv>K6KqvxM_g0JKuVelo*L86{~P0
zDM*FbmDF;K*d`&!j60vQH=VN9*txv^rCe*@rQX36w!!YlK#Or%QwQf;2Jddc0xNMk
zF1fIYDRgPP66deTGv9hh=6yW5lD|cFr6!;ErfJZ^XiSsUHSE|au9$dhd1r^Te4L$5
zvhQ4vg&)X*m)o=p=Wvo8XRvkgDJm8fUrZ=Qb<Sl$5LaB*EnZzq2AgkK*~L!Y<(GTw
z^W>19K@EHCw+#SLuL|@TQx;CPs|^55jKA*y0KP*CJdOH){h)D;nw0Hb4%ByYKiX15
z?4wogD`w{!YT_V-kUe_Qh``JFrH$r=^W8~_m`wh6!-giD<>9lnoy{PbXq=|V$er(x
zAADNfFTZ%p$v(q>u(4>JGugf5Ww;BTJe{8#yQnusnAbE^cIvDCt}4@Dt|4Tc6aG@H
zgznhI$12@*$*KUCdtUpS8PAW3A2qO*Ahhl2^Ji>6l1%|$g~F!SSZ)QUvW=xtF=e2h
zY(Bx6G1hE;R5{)4RGUfT#9{hyKUYnBWdWyt>|8sd^EMF*)gs|!=y0RNvN+$9b)8qQ
z6+W*HG?rlcp-ro}M0}HZorG5Tp>D+t_fA8CH5KY=d4^K5_ZJR1!jfvs9|rQ?=E0v*
zKB1D*zCRq}WTkWPTd>|+r*KMjk$Q!A55XIBdx2Sl$WvsdX!q`x-`2L3DJtip*3m=a
zIMLuNAw$?qfmU}{&x@)hQZnZbG9+6qa6e*GswvCoz<T#1ZDdLkQuyd9xd{e%yW|+p
zG}s&SEel^{YOOnARQPkFNBF}y%0Cz$5Zx3eRw4PI^O5CaV5i6Ev`KpQBRp)2RC8V$
ztV|cA_}QKO?Q{DEG!g0L9?Wk45Xv2NILFDOGkCs<eJgL}R}2k{!$pa@8pbE~nDUdc
zbR>(<m5vn>d!KnY2nxGK)drH-gld;beF%{;yK~R9K-KF3>J+Z4Y!D-p7Cc;pQ1NK6
z%)aB!*E7Ri@d!$xBCVXzyD`$;C1jLx*gJ_y52FbPR+$_-><<nNHt$a3&GOa}Evz5-
zCuuY{8!HsSVB#T;hK?!_EWa@|6kX-LWh=n)9PBR@O}(;nP<XyeH;5vm9Pyy$un%v3
zi$6@^?Y;TZGvDCEvaQqa4{Z=XJ1m;ARJ}FOBg{^D%KO}(?JdYGe~L$9)y;RfE5neO
znY0v@FCbWH=yrd4tC7l$$O=_9dkcLEI|c=8848wVF9#X+KErvpxF$I`P?8lzaVZVu
zLVF4tkvC!1ic6lhAnUu^{&g-`C7ww}!4z{c$aj$`=hon!b<jvMmj$t0)D8{Nyp)ma
z7V9zT<&D$}?X9G^!4sF$J{Ft9>|Au;Ox|25t$>`i_udnfrMO}G(^1qPIE}U5gLY#y
z1RlP4667Lp@#132m{RCcc4!)3#6My#EInEiZn!yk7^h|~4|mWgyhgcFj}xd1*6<MB
zaIE1e&?tW+l;bI<+#~uuZtR|@-<0Na4l$ca{$NBZbvm%5C^1M3(>)`MSByUc;U(M-
z(XLro*>*Xt$pu&GS;<yKa$s)OhAQl<shtNuN<W{Vi9aj2z|aqLMPQ6BL3l7aX=$N}
zle9}R@UXv%|JAbzei00RmTgrfFO?n#Vy^*#xL}hVWbwhhA5#WoMohN+Et$hn2VSa<
z@7InT4mx=zEzFJW)QEfDy56E~@yW2%h~2kSG6TbHO&ZS<Q@^TRo;g*NM^<B%f_FM*
zgCm2tsh&yn-Zg#^s<iH2fu6bdF{PJlVx4s^j_indiD%rvQSlv}j@uXOK#D<~EyrDb
zygNoQY(*c`c1;TvMBa~_YgYZ(bgz?pt5@$v*k5HCH-ACnk&UWxxYBCS0=&{A*`ly$
zsa&FLo}<!a8JpXqAvuhoftu>hpe<WOXwhya>Fa){CdqQN*ms9R_JK$6uBmU4Ki?BW
z(kZow^ex}y3@vk4I(hy{g1SUo8fhHEMyj~feovc{pc=D|!}jxg|8f7=p~VUt!855l
zryw8oo%SyL$yF&Kow`f|tJeq;FlD02m0rPTH@z%4o^^^KA2(sRGm<I9_fI;oGOH{d
zJ~=PZubtg_oI@AjqryvLEgkox@QKlkL_Aw)#a*s%#`o#XUnxpF@6;ql8Oe6MNWbm4
zJpc9?0ehC*eSys5r!OQqy!>nW{k&5THuJv^>l>B#oO6sv9Dn};N|w59;c&hX!gEth
z`Z3{SU2AukfJX(hwjX&m>n_?VE4xwHdzR#Pn`yXqR^DX4O3WSKJD8fb8tTi}yjYu!
zSt9;`9~=Ce6}dDH&?h$8n3%522<y++7x3_amEG-(!Sed1#$Xv^BXfNrTQ?F-Mj)G=
ziJ63hi&+~4R<w1}cQPhn1WGv=+c=RxtqpM5*wNP6!O++ds6^hu)=<gVNfRtDB1!^Q
zHg<CY?tA3sB&Gzt6{rd@P$00VEpR*2<^6noAh0ly25NL9fqXq%5d>TU=!eq!ZxhIM
zjX-8d_o3AMK_Js#DP(2>3PDKymsrRQ{hIhs7BWL(Fn^(t`I5(f@R0dx1L40!A<Iv>
zfDSSXWB~t4;U$y*ppXT^)PIqOtk=bm^>4(G<&w#NP{{f(%OUIKz+O%iR%loaGAsU&
zL)O2_AuEKb{~{0BuJe%XZ+OV|Yxew{%WVHL581BsknL}H$o5wrvi-|EWWUZs_P^mF
z`(Js;{x9>8{klHn_(jbha>)Ky9&-H4`jF#N!CcN1j$hRLK_T=@a@TDN$JO`d{w00L
zah->pf5SuQORKL_$oVhxkn=hZIsb--&^H?WiNb%Ghn&}W$n`foguZ&>I)z;SG7q_~
z^N{OrcnICH|0fFnWgc=}=OGj0-!KunrQ<q@OpO0B7nvBZvyqAMZ}@oW`M6FcL~&kP
zJ^*q8D3$~YV;xDrQszdEB$}YhDw!@1P`suHf`TGN5EOYRf}rLdI9zfFYN!-JP-~zF
zf~s6a5LB-Lhf8DtouP^#&dU^3))YZpm#IspDT1^~043xM0lFO55$!Kxf6^+f@1$>K
zYkEcWr4;}$xRbsSkVC@sli)wF$hCbpP|W@dUW34L@-ie$e0*2n8Dd_3LIOn&5Y!z0
zQ4JJ+U*$3}f|#HJ0aoM$K~3egf)K7;75uFlC`i7_1$xQ^Z3e9PtHwVnf<o7;qQ6&k
ztzU}FtRQCScmWmt$2y^4^B?PkqR*>b7A6o2M4T1D|9$mPq<B@qZzMyZ<5eyzBZ&1U
z3t2cpEKnx?Pg=U({oh-HRQI1-`p<lV!nLakeybje->!16b>zPu1}N6Ls^ItP|L@fQ
z*DXOY(?7NZ1yNVI|Cu9DXmeHcH<F=P=PH+-1H}GQtgkWWe>2KZv~pGL?~Ov#-Bs@Y
zYfJxe)<R*(|EHFq=;5la-}(fF6IZ$ak9~rofUCNGYY7Squ5$k$TY_5itGa$~>Hp(6
zL9OyXwsdKxud@ID$>F7)y{h-O4qh7GtL*>FJi4a1g!CPaA<KU7BXtoeCAkN}GK!o~
zWG-UtXy{;W=Va?Z!hWe4p{5H`7I0;lJ2*NCo9R1{uyTQ=^?#nTF@aUhjhxIJHA&c@
zdK>r;aVFUq+8UYLm;#wLf;NukztT$122Kzc7Nlap23~oPpgQ?yPkxanF8^3nQVbwZ
zMuLg?uLQDQ8i~IpknI|QOw7!`C}alU0S7zeM(BSKaD>_(h(Un-aX?ogS1BM3Od1a0
zjiJyy=vw}dyg!`*CD;Ju!pg)>!U`Y(HbzboRzO>`0*IUy%1IVzJ8X<xB+LK|;bdbc
zVFT)B<p8V^6qc|8-C+eNV}a^;z?}yO92-!I6<Bq!vaw#{C<)UgL<0W<Coi*qvJ%Rj
zU%ZDf_E+Ms10!N?Vqy$93;_?JCIG6yhPKw$`r3bt5Y$xub%f+(g=HQ~10$rQ&cyiF
z5n_Sjv`dfWUqy)V4+&ysy&4}72Xu`E`OgLgzJKEJ&oN?zjs|o@prwDF{h32hK>E)d
zx~3TZw;W;wTbtWBJA#<O*3MQ==5|)@U|>oZJD59KTI)NRftVS=R>qEw#`ezoRscu>
zJDOXYTj@JoV<;0KEWhlCsJRtzGg#CLaNUX+19Qh1ENyIK>SRX3#12doM<)kkeQS_g
zs<s@{Ek8!osXfpSk{h|Q!2t}YFN<r5Z$|hre-$A`J@=A$dh1@_(8=aP)q2p{dxmG8
zB9UN8y?ZI*p}OQ)VXR!YixKA;@0CiYI%8Bet!!W1@a+|FS$5@(@uYR8c~BEHa7V*e
zG$fe{%UZ?xWlb6hyU!3N{>H2oyQ!!)oAXYFWW*BIvdnun(K+Shc*=&ShNryPo}Dyf
zB=2bEmB=B}`6sPE(JfRb{8elO#e^i}M4@8yo8n=CdRxHCwkkH}5Iz9%4h8IgFGsAL
ze+UwA{!<kJ>jut03&bTZ_{S-8O*#H=1p;W@&cWQ;_}U0VvGHH~E}<+UA}J5_U0L~e
zrN(?IxPPnESXln(>UE_CQ9RdwkO}dh(m>u%bp<JPS@!CltK*+?K$oejl91G&^8b_q
za(<S-DtC2+)cu#gYjj+d`X3zsp`E{};m@*gY1#f!7OtV-|E(;raDnZeZ6Vsg)XeFc
zKtNYrzX^oAw1$!tAP_=ozY7G@e=x&Le-emaT9kwxDn)<j$-glDY7l<uQAj=~)F*en
zBnuD)Vq<3p0YAv`|N4WJ|8+l5Hz%-6=K!o2Cvcq;xSs<sARLSwB%FZV;{=>-tkAA;
z0uYxSXq$-<fbNjXoUE+C>K)Q+PQZRZy21e%QBLUc{<3;@s1xaDQxFMaWrvo7m<&jH
zHekuj0<8HUL_^XL!k}s3`54GK7YoN_IiPcwtYQUx2&}*ztXDZeNeJ0&z!Ca`8Y3nq
zX0|^I(xrX-M?ty<?f<ue1Prl>6^IG2AI8>3`i^GTWeLK8-(*QzP*F(;kR`>x5vBhC
zn3$OULAL;ai4DR_;KvCGs{Ki5ehv4Z2A&asJ&>*gBM9690a_5)#R3d8<OlhKrU5qt
z<Oq}nLCXI(KS+7V?<ahNQUd{S?7%odM2iE6$FQ>l(f~*fM1~-F5J1HS?6`r@!OjFg
zFTi^Sxela3kg`BsfNuoSKWGh*9N-54diKj=K!OdrwdZGN0R})wh1|djxTUU+%z&g{
zU5Anh!Eb=!f}BIx04)b}6>@}JzhuYd@#-3c07xF>2h{y1S-T8!{v)Ef?%w%d%Ni5d
z!~|?&4oDnuVh&<qy;NdG23Al#2Kpzhc1>aag6>kHQjbK0p@PWqS6uiXU?D5pbvgWF
zX~6<`IGG`SKM4DQAQ9yEb3lF`p}AMrpi;^Xbr3>y!ev>AltAv`V7in}sMIiVU6zGP
zCOg!9@aKA=sh_Q~0nRs8KrpV3kUYrwW!tQPVuPGP77@@AY@C3Ff|OwaC}9C~4CHuu
z1#%TAe{G2W+l3NTHU6CTzhJ^2>ozE_W%aF%e_PuL0b2!todWd0@`M#|h_bV@X+z+q
zsUrzHu+ZQHfdw56A=?Fj3?P81ZzpaHF<zHRNd1sJ;ARk50&s+y8w%Q(S^=93Kwu>&
zW9!GjBm#kD%pDzpB_9-nK>$2Z4X|1S_8CxvZEcLfPG%0q#xzg@0kmt(0@3*Z8x;Ro
zVq9MM*}2~lG0@ia#iQ^QLS|)WzJ`!tTVZ)FU}0{+!N9`8B0!e5kQ0jIJFv%h;g0XY
zhq@d;IiW`gMTHMVz47vx1?xxy^GF5#NC5)|9v0Tx{;9C7ojX+ENvI8JNSL^|IOs^2
z8G)6XpfzC74fSnEWB`C~44`}<VMU^3YiMrl<W54(W9H;!#|;L%y1FvxTRSq?I+*g&
z(2=;B10WvQd|~Y1Vr)bLjkZI^hXg_y1LU8ut+kyqfE7q&0JQ610|NsC$HW2yg9U>F
zA?SwOr{myT#|5FK$0en88L&q_6YxiUCz!bT&%^A93=AJT(uLl%K*|qo4sFlBc@~Fc
z66`}qWM+X_9WfhP5B$xARtGntl9!qx(FG^GA_l5*DH=Y4KdRVm&x0M=IWpLTKZ-t@
z!<}eUAp4FfV)9s7AmhCmad~cbX^B&|rJXJCorxWpBg7+uZ_8U>Bu4LN5l<9+=ntK|
zImy&UPQtCgZ<6bgSDcYm_{uWLIgF^LWh^2s^k&|uAH^dZeJ>AUYb&ScvO#Vkej%aW
zH@#v*Q;BZ5WF8AAAKAvmWFmgok4Gv6jiL~NZg6CvA{RuVqK>AZqQStRsMa?AzMO!r
z`u`j8|J`>0odEQ}dW4=C2w!jl8ia$1^=b~V{*r&-;&u6lCjXFsMj-QVCjb*OFok}f
z0ct;|5wO0$HXQ`PVqjUY99R*o4>kZBUaqylMqp#G3D^{D3as@Yi$Qa+CD;nOiUb31
zs0Q1D?Z6ITN3avv73>Cf2S54qT!OA}uFs`wYnuPTTw-Kny*8QRRFR#OjgwzizBndi
zgyUk9U^~^hA!vn(kz_AlZH#Fzar65Nm~mPoU5j$%`!(gd8=8tm%8lzq7@b1mH(`CP
zo`cAO*PfEmherszu1R}bNH+Cw?FHGbuMl-?mZ>feAL=x2{kZVhas{X09>Ik6n()^%
z&~Mb3K9+9O#!2aj;N2BiSCFNN;obTE+-rw16uxTiHBr*{8OOo*cPTvZH`06OUX9}#
zoWBl{Y{>j3kVWlm|0SlVzf|L3>cuc_cS5<Ow$cYYoW;f1g8ao`(1|e@*;y8=v*jZl
zOkD~&?YFPxDT+P4$C-<iU$YiS*H;VKea)jv$dwEg2`JJ$7J4Vp(l0kHufQNF(@!&B
zAgfyEbZV^qNIg%j4KFueWnm9tk?!`|Alo8t9NcUPnRx`AYPxD#M&FFGot~6}g!%Yt
zf{tFrQWbjzvs#L}ruw8>sQRS(8#PPyH|lF@9%^gqNu{c4-D-E#y4A}o*(=kH8~dKb
z#C?!7<^7nghF&2)b?@=rHIg?^d3T@sw=Pz<;eI%$^rj}=6WsI*3TB?m(LFZJ?N4P>
z;@taKIuvBW@AOb%+is%O)FdO7#j&r9D>{+OUI<1Z7BBo|)_4%rV}}8$;8yw`>K7)-
zBaiPrO=YZ?CSpcyJ!BNgnjAtdFrl{_6>KZ0BA|}eUJ7`8^ELcIZ$@SVO=pVUC*Qt&
zXXO!k7y^ab?rixI!Lq|=Z=!33$=&&8tvO99U}up9+R70Y*-Og1rSs(4)XieF(Rk4C
z=``*)E6bHFXE%O3)L(M(G*}M|jG)gMH*3fLcyB&a%PG~;wJvRS&Bm7ZE;n48z$-di
zDTWx5X<74t4;orW@BENtJwN%S+r5#N_4{m5#{ME(<?O8|2`fm`RWq~uUhcOlKk8e3
z*z*CcO*+vB`2w=Z{mVpC==VwDRq(pdhpphS^&JR#by8lo%LIP8l|c-X-CjbCZMl1p
zr;O0b+%^0?%pv`qWEz5KHf5$wA<FAJZ*!*?*X##-n`9)yaOFn5_nEmpld!vm(xz-b
z@u~IYGX7}e>Eo$gNIP)FK(`;9|HkwJnOT2x)Aw0Phi20j&m%OSCuQf<7*yZ)bh=`4
zcRv+Pg-;f5gRl%~O~i2MQ#QLC6XYlE_hH1ejU*&FH3$^4l2na3Jv+_3w_23tPl;c{
z3cP-o{_cSAflgJ2CP$c#OP&0)kchKS0-0{H!RZ2Wo{f7$-L_V8B??Nfl}<L?<!@yZ
z5)dDeo`X*Z7;h82nL)}@H+-QqZ&6lI#X<-|JPYFf{)|&}YbHs(xedid(Ragx<p^%4
z_%!~L{?UY%vH_jUC;u`nblHt1&Sb9`?D_WerFcRd0}SkzV1}{rp@|XgiH3)^<vhGW
z!%cBP54w=dlO(mg1R9WCPkdCEi5i|hFh5XE_|X$_gN$fGhWoA8HyYQ_lJV&U3m3#R
zQnAL?QTxi54GaTc9B_m%m`5$`Egf0+1J=yOJqlJDa&!sha`JM%mr+ukBX`N)t{a@v
zx6@pcX$5ssNIpn6z1h_rQKjhcUYyw9B)f=`1UEr-s~aC&9}$n5Q~wEVKz&FCq!CcI
z0ZX<LB<Wgfdz|2OJW=N-hKqEz;Eggv`yAi1x$pf~5}$96C>yqSv}jl4w$~zY=tarw
zvvsf@)eMf!QGXxCt5{;_6WM6Hqm@BKcN^QxX(7;u?MWW*3H{sk2rGHGW*ioS{8R;&
z68RRxZ>po4)$cq)K-(z~c)!A>v?8*<sq4VKr7gWf<bcSlec}CPzBJs<fbTn-URk`l
zfULD#=d*K{F!Gn61w!!CI18y`jscpNm5(Z!Lz<jxsNsVX61m{bdKZEB-)ZB>HF|8M
z#OUgi^x|t(OY36^qvaj=k?U0jq#2Q&!m@mgrq+<E>geDyv$euJTJsMk&KZPJTpjo4
ztL`MY@zBjJOGEditP#Ut^hr<EjvSSE4ap}r5}|2Xw{Y`e++;TS?r!c_ugy0sX3-ti
z4)}J&(zk~~h@S))9eq({hgUE$x2zRxQxLl^Yt`baBX0Svfj?|C<GVNNN+bnUc=j7x
z#}tmnoXsFPz2nBd2lRNS@}MwG+l1FOLKX7GyWg}ahYGCA{nO-?cd98$$Qns(sW2pT
zt>Vy$B2S;Bh7XfyY-D@!eqCTIJp1U-C}2jycEZ&YN?B1vI81`5k33_cP=Y&>cl>d1
zWZT}bZh4F`Sz_d#(+Io_YW%^L`s4R>!!onurmayFlkd2PCXxL4jp&(tbD!|Xwf77z
zz-1)*NU=|Fq7tJ<J+FAL%whh3*Yzvb1b2KYYgk`ql9nNxnuXC^$oaUsz(UCBhdv?^
z`)I+{?U865x~Q#@@*(x!n#U+3jD0F~s<*x_GVVx|RRycRJ#JY#>W@IZx0?HcuXj6A
z*|m4g_p@C+hZE;9JLhwAST<UjTxQ`zO5l;4yfhp88=hj+aw#R_cQ%BZb`N}0ZWhCT
z#L~|C=o{)OB^<;j+;ZoD_01mTDNNN{o?3rR19{{fwL}AVRV9xOJrfG5Mza`csSrL{
z$B)m{>eD1{&*aoiZ&WJ<VJ<r;;RNgJM#x)X6fqZ(Eh#MK?^-{6YI*}!eAR>Z3qv8f
znQ@x5+I^ZXf@rif&MBYj=cvZ1FXm*ueGu<+%;-MtlUCt-!{thh5m0%U)7vNijkg$y
zcZkvZc+CFmcoufD1=A1JFEN(Ntnc}}Z<{mNM;Xi!l=Ygs`h~uys8B;sq<5&CD6Z~&
zDHQr8#5DW^`h~)P#r|uDm{c6TyspU>aT$kn1(*?eDLS5}c`UL4x_t@_&ifS)t19!D
zhxiFP=xMCZ*Q3a>-br$;3ge|6xNMDW9QjD{V;iaf4<I^tE%2qvd8#vgwqxL0iKK38
zvwv`M^2kE~A<1RYK4>JMNW^JygPxwbTv4HLqRO&x`;7SAcS3vaqM3GYbH~}d`mL<T
z518UsI@x-H6>i;5OHSA)#vPi=?s3-)H+|UW>N#N_4icGe&Iq(v>%D*td!S2>qgvD*
z=V^f+gW66)5&ss<hr4lba1h0A={ZCkN05Bh!s`ZG!ph3+;FV~Ct|dAL=BjY7P}d?b
z%Mvwa1iny3l8aU>Zi&QeXJ9*nNJ&79l6dl<zx0BL*I_N_3CAI+Nr_6y{h8y0`;vSZ
zciQF)o)B&wzLTqoOy$~R7;aTZNekbq65EsI4fl0Af4H7C#_uRUgtj@*mF`y%*dbR}
zhj6D%2Y9kxr=<iw^ozmJ9SOl&c|`;)`R_(AU(~LCa3J}-sms7LiON^ryJ9aN4o}5{
zX$Ge;le?kn{~n#l&bRw<Y7?)c?q+r#&5Ah!L1u?+*PP3wl`|3Z@aO5#pz`fCb3u_Q
z>J`K}*JXLd&`+zMzm-ykCt;Zy-Q3%e$s>9W2ImH94CwU^B-@>r5%0K0&ZOd>;a(gj
zmVWqtj6Kznf#*wQtr6br%O#u7l5g(2cL8obCrJFVUAMB~Pbo(gxB26{@qPPjv~27h
zg~Ofab)49hV;xiP<qOxak!B04*buArUTjwC)$~gj!ZtJ_-t+@Wy}{!du5YJ-<I)Q?
zDbt^$a?NLKuPEd*s(HQxhvjn@wv1^&Bmc7))7!3FBScvXY%KkS6(5;S?(P@fGg}oA
z!XTt?Hn&~*G$yTF@v;rA@oUZZbsbjl=d`&GXigT<F1vL!3~|$6TT{#(Iq%p{x*EEH
z%=XE8iM-JRZaEB;h<9x~J=&mZ$q?biBayy4kN%cD!`%Y8O8??`;vs+dM|$H{VuY-h
zvt*0e4Z>zCb!cu|k<KbR%PbE_kvJsi&6C`+0=(uP_qe99DBSf(3w_K~x|OKR^I9s&
zLbRD@Pq)a7lvV)ucpxSQEN!z;roj~vKf~ppes+dF9HcwF-iX?!s{5{)=bdlA{G9}O
zOFV{t4VFAitWwPV12tzHUN3T?DlyyaH8r=oukKPFJN~1yvlpI13;ce>Zf~Yv%Fu7%
zXUk|bvCrhv2CaIW<-8&Mj^wCuXu2G`ASL0Rwe&Ptf|8xGlXa}^jeR7TvNK_c8B4+9
zKyq<nTJHmq0uh083)8D<C4F;sHvaF4$DK!h-@=t(na@@vaO|CDCqVYr-(<MiT`e2}
zY(Wh7sl0+WGbzd>^qEFz9bFeXw^QjDtebWp<Aqof<!3JkA|Y;(_6Q_d&fV#=x6Hxy
zdb2ZTz*3bYmPCd3_RZRyCb^$eqU5RQnRG9^-&X-rFh@e9m@UE0bvWC){z+t5vRu&f
z0F|M+shkin=Fu*m>8eEAb8E+8{j}=6{J{8Eq8jFu*i;r2GMs2>IfLWW4?pkBMr)?3
zX@$+6o~F8l9atjQF+6UpF9~t?9hYL-L8caKe^m8wQzuBX&9tPP38$<2;^wIRKoPwj
zBMpto7o*79f$VV@o@k;U_%LGiT@k!!$<Y{rlv4ONx6bKeZ|O9l@wkAU3D`Pv)RAWb
zIdC6TRJ_a{_UN_X;TqMYe&?C9d?Xk-s6U{-9fMQWABXZ3gBq^&rL*g+W|sqf=iQbd
z0;CgX5wg?X>eQY$Oh++}<ja*!;<6!GI@M3VFRLln@YGe~7X%WDsfuZ<W`@2F8M>z<
zhfnr4<CB-Ago<^AtaM{b?j0ONgid#*=(RN+tDer>$=8XAlbN8<M=Eo)f-z-fIeCo)
zlW-g+<0JiRn_YLfcl+W}?m8WB5tUQE!e6AQ(%TJ@K(24#vCrw6S+U{!c3*5D-|}@9
zbLYY{SZR~{e1ZYfKb9`e2IpV;34+ImRed(aDp+Wwje2Ff9=v%P_4)KWd7p_X{35C!
z+Nz#aN0za3V{fCFFWNGgXoc?<{a1E3RD}Fw+Ndv%4EJ8rPb(;lzO(+mtE(nwp^R|m
zOE~q-knjna@l#&k`J!cF?V)vvMs@4KXTEiBz_$DHj(g1*<8IW)o4TD<o_?NUM74DT
z_fq4R+n&EUGTf;$q&7vxJ@)|~0J2iKecw$>l~U7CVNY68826Tw(qLit$j};jM|9sN
zZ}?Is(o+uSi_e~c^k4QJB~Al;Q{S#oOzVtppvuo;x0)ep)hFa~*eshaL@%+l3xffy
z`9z5!QFg@Vq09w*(UTUPK^&BU^?t)2eMxR)`CkJYUC<9RVbfHtD>Ts97L51}6V~XS
zgkcX%qGDR3`a~<*zS6AV1-UiC*0qx#DaJU9WB*X~2{M`r?qB-gc=3fR=DagqajkZY
zF7f`8aM?w+kvoNIPY0zxGimf37kPZ+N#=gjyL4;r?Wd1cwnE?;;~ZyJqzzVkaA%-{
z6po64z3}4f>C;ck<Qp!j?N46`hgXbkpDvBk)PTq*gz&XR6OKMb4)_^m=hD>n5#P-G
zn16$E@`v7Dm=sGFfqnmAmG|I!5?g5dDe<dgFW29lD$qdvf8$j7J=}lgRb&D}Cmg^t
zE$l4pzq?d`i`QK$(Bv<d3UKE?xl|bc)}`{xsdzo!{ztI=GA{o|RQ_k+{m;%4h)4KO
zj>l_0?El_b!okUS%~!&vFlmYHj+3ry6Hd4mV6yVf#Y7E5bGUliD@syL{bO0k<41UJ
zhkZ$9fyH!nSGD0M*6<{zl?A8d?+Zhpm2pZ?JjlP2z1iTyi8m(x%#qO+<P~*}?v3s3
zZui=l%QJQB{j;sqE#cU95mNsN#Gta7)vd{_j{O|*AH%t>Nza_v#8Qx6er5D)-K}hR
zg8u+;vcTL%S7i71{p3*lmJdHQ744+M6K-^4WA+pYQ8r_NZ)@$&*LR?8(xy?CW;FL3
z#KhY-KQQ)bqdz%+%h>Jw#><z{J-<#B=TQO`h6{qQq3&Vof=JWX`LZ2-KGxQm`FF^&
zH>Zr{o-Vs=Id0T8>*+aYd=;LO-zFkX-u6e^L<e3$=Be-Vo_`ZLd~cSKpKXJnfcm3k
zQcv1FoP*RS&xUReU>UP`=hG>p*F2jNXGuC}KU^Y=U>)$dmy*(9-`4ON@!*tBWbAW)
zZ6{su9ARnY2Ss~r;$m`7Excat@FvF>khM-}A=UsoG38ukv6=nt8B+2#0iH5yl21E|
z*~;=G4oS_eqVCUCWtL;OGP?t^^My>9y7N_b5jLA@0>E>jY2#WiU-Z%!C7~{CBr9u0
zDHge<nvdc&>5fL?uPUXOyv@pRW>A@3!RZmCtG!#*F;SeG;B=PKNn4D%?`h@jT^37w
zU;S*Ht0grfCKpF!#q9p$-jeNO0zdk#>^AE(b*bAQYt9TQL#FFnRiuk0iN0CnX}r8&
ziNbaGQ3_#-Uw>(<FHVRQp3Q^)aZ&=V8-_vfVwiIW$CEpCwmBP1G>(oqKjuAm3n}j3
zKM_opNUPu87_~zV)SdQr-a7~%U4Ab+pklE4M)%o$>xOqdCa+dDz&<Uy+iTM^n>OKR
ze7lJ2_~*}Lh+TRpwb9l!MtusmmK-#Gd>yU~po{(Cct1&;h9<v8q8QU$0r7dDh-?|(
z5Qjuw6<NtERF{`(!y!dlx$kRNIju(=>Fqhj34$wM5?c=@{fI&1w@&vmAQ!8t{qW@j
zpGo;*%pgKX`iGEKeLen1UN_F3nM(YPhPk%K`9DT2e%l%HQ^7!jhCuLyjq7*q0$jYV
zU7*P;?E=Ig{?=Oy@y7n{to@}~pc^^<tTCWLtLyGn$c6uLsDg!&?V83gG}6;I&@(bJ
zay&$fsD`Of_7-Qkh|)hL?Vg%G_Zp9EEuH4+ndYGbzU_^5noe@e8~?*276u-L+)&3}
z>5Zeeps|9<6Vd0sqLul+S$TO45x&vSp5^%(+E^%<I2fe++8RiRTG$vMN5$k|q-LZh
zX($$D>c=Q3D(dBx>+0x~B_wK=Cg{f66cpvir>non!id#K)liO4&i)#op_=_QAu**G
z#wCR)(W$SWo|;~QUU9ouPJ;g91L}`GGRAs(daQbS>jruTMn?AMgUv@?-SG(gpk<b_
z(=UBzUYn6TgUBqWRL*jXYB6sZ`5Q=cYPC1(wuzSyPd6hr8M-#}pQkB3Tw*_R0sfrj
z*3(&o{L0;S<h~<VH7({>Z7oc6H99h&qhstbGs3_;{$xG$yI4UtI{Y_=>-SAJm(c)7
zFn}2nsf7fQe+2`80N7P9jO$Oq0Cr{;5co>AAb0+QcmZF401LWYMZzEf1sQ#}pK&p0
z>Q9j}$dy0h1V9VF<WLy=2rLQ~2TOrvfM|g{5HC;yD}z;lsDZxI&%l9|t<A6Sfsw5h
zu*D3BB0zVYfsNe^t@N#dkb(*D;4ctYFaw*p+nE{L0D%PyXlMZvRj_fkHZXQ@G&h9=
z7$9LfJFp$(xnV0~6Q|2FXe<nD2gJT?jebRhukCMv1|=Mgt%2QI5MmsSU4SMWt$^JM
zK&ZmW)fVgwcKO?o1vEB#-3mi5{Ez1pD?1~{wMmtyhODMETFXbeun-&^l73smLr62D
zb!3jGrf(QiEOiwI6dXu{OiJocimI<e+EXQj|0tLf=M|?)yV*;H;+hESm)13(i&ZyD
zzVF+7S$jw-U{*8ZwdFzV>F;&OXSq@Hx#^<jv73M6z2^BHf1>WuvNXoQ-L0F4GDHu1
zBD_K~o{d<|Zb>10?L-p$-q@;jFHQY>Gqo+r1ZVKqJvT+;7eUJT@kpCE52i#<B3)7#
z4L?*@*kE@=X3VmKSp|r`Z9TBp+$_OB@;{}me-u1PSmdMkRA__UB3~^ls*&F&RWi@v
z2o23O;iGFG5$|Ui+(L{u?g;)!o#b$}Z|GwsacmxV3d2v8);ZRByNL@MOFnu!W!>Q9
z@l;C)qs{60uyC!wJ4USvHG-_&gRp(2@8HM@qE`+b#c2lR@7V+`{YC@)-|?UMsgsL^
zr$0R8J+jB9lubW8e7J?xCTW1nl&U5ZM)Dbh{CN%V#d6k)^f3WXkshiKSa*=o4X}4e
zXJxG@Z)2c5-)SQ=fU8JU6Oty2$BIFgGXPbDB}&p!J;t!bf-}Hx#=sejv?KXKiHITF
zCSZUuD~(EO))MfF{MetD$<LAFD2qhHt<~M#CBjAV%|lP*AHL@n-)}d|3(|ANvw}KM
zs;E^Z$8F{LC7RZ5&EMO-J9e>l7m1q^9iN3{aOGaX-GUDIMfmbtPpk(z{D{!Y?w(D#
zzo=>%XI(fTZ5Mubx8;`ez1L67?(;@H9?WPu{9qX}D)uTs)`~Q7K$Y?f*rfV%k7+?R
z`)P&iWY{B3c(R;Cc=@@0I#R<{0kWb1ECrU$?W&gp;+46#SJzre9xnvNM@4dL$R&mN
zzQw=0LFIWs^Wsy2mC#JG8zl>~-Dzzm1|b^Jj~m#D`1~OB4}JXM?mKe@x=}2C9P=yH
z9lLAj#9H-kC%XanHx1+a4>0WE=A^BXo*z{Zq^*9f!aBV38kIJ0HTF1HnNp@irQrU_
z#u38-0_c-XVsq##s>)VdGSlI$H6(sk5dkeLCrRbk;-N^9;vsM!y=dpMn9O>#MW*<j
zT_)?XqIHrov|wW~T~yVE^&607)^`-*lOjD|rEVI!VW`tTBr!)?twTZ|#9+0Xn0H<F
zu$2+VA;{YA|FSkU+Tqbu!L*-r=(3lPe}esiPGf&|>fX#f)6&^sX3`%F)(lJCsqbhC
zlSCPE@yWu&<!e$J$fwX$nguavOkSbvimjxGvr!X<%d7Wk;jmNS-C#a$>QW;pD2z~2
z9xaL+f4nX~l$bI@efnB6nEAy{8?I2v+%4>75Q~OOOr+3&1>r%H<Kmn3yPcgT`wx$D
z7vWS?YD+3d>a6As$_LoKWD7qwCbtsR?zrc83PaI)`_xsu1XP49A^q0Wgtf=hziI=%
zIzhBm$9j^HLhyBh^;&hEdR3j$9ShAYSL;*u$xN4ae@Pin7w(7IC^WNOA*dgkChjQT
z70IV5Ksr^CI(igBIC4NUV|MHQ^yl{)7GjxW)1T^=x^GhEQM{DDh2u&dXI9Th80!(T
zo7E6&j&jWU5|+C9Zj@njxm%RJW0{%a%11b}tW`onAMeCP?D#IDVwVEk@u*lU!NI){
z24cCnFm(OzgJ#7WeQv89xg(wd$w^EqW5$W?(XRIFE8e8r-AZz+=_&%o@{H8viD7X%
z$e>J<<{gXZk-9<31=G1{Hgcu}b;t2}r#zL~y9OeOrh7{F4_gfuIyOGrzhMqFAtE5(
zdi;32NJYa?4L*;ottw41+l=DF;72~9<><Ws!`VA`SN@0Dwv~!1wrwXB+qQRX+qP}n
zw(X>1+qUh>&9A#p<DBk0&b?#ozrOnweCC>KJ<OXeD)HQ;Pw^1%V4EoQyz*k?>;`dj
zCZrcOx^hi7ho6M@8|<f1)38ey<+{#*0otQIonOR5zTT|2<*M{KS)*>X%`q6x%@vEW
z*d>|+Anl~3ZjWK=h!D30{k;<8;LG-<f&E@eH7^P*@zd4EgDT4poonDOq9aiT5M
z76t1_n2Se^`S`zr7ucFg%;n>WrKOExv0}w-3uE@KS}bza9btJ#5K@Yi{muzSCMsSC
zd&({vyQAmgIHa2wes}2G8SnA0fTF~D_}0HYc+K08YIk4=+iPcq!p<yrS?{xYeE~rk
z9nvSU|CvC=0bb?EB4`j(ffkkneeWgTrURuA0~FQBXjQMFAGRT(%Hw%=-|_FiUVP>-
zpzsgR!~Im&vU;>krDP-nG(Hf*K!)B#Ad6c0G35fREG&86_3<!uLD4G?XBMYK7f*DU
z+~KU?7gU^vjnZj+UBOv?#pVgDU^UV=Pln4>jWxdwp{k;Y{(MFV5@OTHJzf2pYsZ7N
zkBiDlkIfVjU=o>GAhUY`<Sk}89K+^(YRV~prr<=hk|I718?<6FYQe97_d5gFA!Ea?
z2#-Bh)bQc134V7=hqPjCwMd7a(SRzhNYW8?GH&;)Vdr3+rS(w2wB~$PwBm^pf&+~9
znl|28XOgFG`tb(c?S4y^Nzq7^I*4M>D%bF6iW0JJq$T}0jopPj{a$q}9EG@}$!KtL
z_ku-MP@Xk{bdyRX%$@yuDF6r^pQdRaeS`~cNK3{Y%{JN3P%1RlQAFBb79ZH=7Q1V@
z+2&_Q44yj$KSX-UTes5!D?E6^V6gKkC#NiV(f@4TW4EYVXS#UMCAJr`VdAD}04giM
zNpetq3p6r~(s?*Kz@I|Dyd5oN;->1B0M#_o-cF!z%9pA7EwyDe<5ZDl{|l*(GV3>)
z5S~usJtVBl=)#fYIs3!)*!X&umOCCsUf`Kh_nUB4Z8KTTa>6<Ry9}1dqy9xH2D3w0
zbMWD;aFCi>Z3Y7+L42RfD1H@-*a6)Ijkq$oXlTujgWYXeS*|n8UTGkIj~dklghmj|
zAaj5B3HwYCZZw{bAMOVahSpJw4XB(=3|2taZEr_GPKE}Ya&m+x|NU*ws4azk-m>d-
z>UrJUS(gq6M|EmU<=PAmcgy^VbTk8p+^aATCq`8f%%Sa%M~yqi5T7wGV(0!TS<wQ&
zP@X#jD;akqF++O=sd}+)cQXlMsSuK!;q6?<lGDBGiDBLlAAddbC6V%$M-D3yr;W!J
zLZm2I(S=<SSMfmTmc0Qm40Zox^fNiC054kvZrsGhTdQ;=TezuS&{hT4#)F5gJ)0d=
zrawuQbZb8wbU~>@R&USucol*>1Z{rJ$kEQHq~~lN5!E7(0(sec_1fJX8JQZ+-8-X?
zmxn)PrUBDC@T{=Jsj-d$=)b}^|2&xq-C178Tv`itO8siTW9zCInwvF+4=Ww#{e0=&
z7nKUP9ocr0EA63~H6*TR`ilA}a_V!Z%Ne>3+iLzbeCFI(ySWhkvF3(j-1x`y1RBrH
zOJvw>pv>M20LU&STq<3KTlgGTxn3e>6+#zoB3XP>(2^1J3WhDsPw(tHr2oTgp0j|j
zBQK|{n~hw03w0zbZw!91N)et|93+4p>LPxRxEx%*)aPveHgAO81?}ek7Zx)bjSs#7
zH10tpXTW5-8HdNYesO)=BIDUzp4(d@CfldAimBt2FR8S=@kjTMGjK!jgwA6d9GWu>
z(Du{IL$e=lU7A&#O<!Mr_)`a)n`j&kT@fX(rLT_!m=8a)@?=@BxIs%wZH~A5V4<@=
zd(Pzx4`Pv8T6xNWFpHECG`ZKop!gJURZ#(015TD`RNX5&ajMw3>A2aciAOcbeY=b}
zu?ccSnEA5{Au5B1imB$g$3&ybcO?T<tX6C7_L?@YZ9Xv}h-{si#&k2x<&bTfMe*ZQ
zO3chCD9q5=5h1f(#@)R*sTTq1U>S=AGcJZHWFCxXgKi6++0(SAxW<HYP2F}A#ef=3
zdQo79BPsR8Ms6~-g}n}{N8ZEbeCMJiF&8SR1N@>TD-Cvs@L@kZh1AHoNUglhC`#k@
z?~4S++L8zk?(1mclbR)zt#_X>DJ?iM<A|@UqQ+6BfCJ0vW#hLbFy=n8h0*=t?pEi1
zA9MPGn2CjpQ^rrB^pzMIXS}v3WBX*6?zy`p?qn6gv|sE_(PRXe&EhU2g9^WnF)wYk
zlu(s_f;U&7V1^AFytw)>X0lBoBV$_zU?F$Nt?a{ESd}stuuhC5DK3vW)r~x&2=?ky
zq0!aZ-O@jZlsG%!WlFz}GwB+kk|AA#s<@=f;ilY<@YlA$HAU7ARfG*H)^+^&G+%7#
zZ@w2VSt^m$6kVx3b_nTwMOikGLIw$Wwp?Ey9Iw<hX^`r)FfQD^8&7Dcu9jWB+75BJ
zAws-Qu3oR*x@RIpe<+oCVLi@HaP4im=61WgoHq5H3=&&C?7I3HAo?__P$EO)eAd9T
zJq#>kc+L8gY#qVq^{@d;o>L2A#5<ntw|mI<T@q#74QL*C!dX&dCJ|rEQn`v&N>lh<
zSiRbMjfbr4B}$>dWQgkvUN=|SY*(m&rvB3Gcb!jOoH|Wdh*&zrn^;!Fn^}@2KbEm_
zl0N(IN1oM+wt<rtcR<Zb&qJh#L;LOoJF+*F6}wBZ|M2HHE5ToO9*)86d|6VwL^P`?
z$%#-uWC;L92d9UR@i`C7-fo!D1dtxZf&?*sgwfBy)EVjwsC#^g5KvR+-$6)$l2C>3
zi0V*x4@0q{ixiRzW<Wb#rACDL%oDJB)FWr-dCiIJ<U&HH$1OW7sm@!&HcJW>5hq2$
zTOm8>*xYM(Za_x;Q4g;#20XQ>EN@nt@Fr!!s)l^9Ut4=kS+I_KsA+^!1=3Nn!KM<5
zn92a}uX!}IA}2pTi8SJR+v7OL2&5U2`)A`VAZLsmt<db*A&<OSbi*__yf}J(c;t;A
z!h{ddF-4M5x|Yy>YzW@`<^lw4BO~-x!oI7%06QjCi@gHOX8$D;zeAINkAfJ1z4&+q
zg5Yr42pW2>j=)Uo&?q2+@9nLZtI+ljsVv%Y%;0U54QQ@)h649lxy(cq4d1j7zY{H2
z5IC$^+k>alLVQ-J@G}3zdfV5|Maix-=7z$n->AR844#vft+G{AY>}0-x~Oyvkyb{4
z{vs^L_fC$K;Kux1KhoMek&DAlLQl!4=Zp^cOl0{0(*=@>-HChIlusA3f;251%q%TW
zuaP=p`ST!sIek;$w8vp3@ZQSXx%70sKFdW5zNN3+NXpwO2DTtIO|gQ@>Dc+TSeK(Q
zn<?qL;EL<q2YhRxN!5tXu8;PxG|kFWapZm!n5rZu1rG`aI_RKu^@O`;_w>1cTQhy8
zv6lh}<GMvzTfhF8<jl!5v$oLYu}F_e$r(X)#x3U^7kzZ+AQ3{TrJ$teCIuzz(ARiA
zKqr&|YqH$Tiy!ef@5i#YcS>W|0;*>IKDtyu?VwMiwJ{RJIvBr_A}0MCZ4it$r&4?Y
zxa28icHw-U@xqp;WA{0)kw+y5sx6VAoSakuD=D^rr{>E-O<e&K&22eU%dRN6v%der
z`Yum;y4BRQ%`{WbW;S^tOhRUXm@#hMf2l?`YBW?V<TqtGesz6U6N%!q<5UK5W1U%j
z6bzYK*@!?2S+~exW@m|*ND4A{7gVS$!ClvSGQj!iz^q%^jJ<<O@@y=@UZFlUKEX<w
zG`G8>+3c=TnQGYzK@US4DJL}<%&r<02ijc(aAZ5+L}7GtHAB>ok-72xULbYGclngO
z{0JZqlUcAeNJvLSD_No^$62qs5X`_^Gj$-T?=aSw6#gQN5_j{9^oI^fjBFKyoIb-A
zBZ6?J?VY9x;LG&0I;u6|89wv{Rsp^N!$yp0s!g2ZtF7Z-mg_&672VE73FWsA0#!@a
ztPM53p;1WD%s;`TbaqbR5p+t~zuu&;HE7F-xK>+BkbJn!dK(0>J1b7r>{MyVnys28
zVevy?h;Tx2N6MVfn*5OR0s3C4rn=NAKSo90WXR2g+`<#uaN3*d>iFOd9ruB+Oj}YW
zSp*lrd@(s;MbY;`)9R$Hgnja@2B=8o8JS{L@lq~X@I$ZRq?HA+%?P7aK&=BiB_joC
z!E^(`uxfPwe22Sg{t{q3!7`+iLW}EY8~xjQ>4&EljNK4<M#$VMK|n*Tg%p*g_4{^g
zxWNkj8h_C>Qn%oepAjPpC&4S753bsSPbWi7GdNQV{&vm)vL_gKZX;OuN6839b?D=W
zs6)6|<rGRf_QWR!xJT1MUC!w2>;;=I{$FsncKz-D6A}JP%jo|K5&j=Ftbav>-~AC9
zhVPZE@A}VoH-!23{{qn8k=DOK!tam&89=lCW4`|fB>d;k|CT}1e>a`}8wmaHB#`-^
z(CmK+^!#)Jbb@q3|H1=BzlosuHxrclrh@-i#Q(ts)&C)bR^O=3LC@OoKLNsj%;JC1
z`Twm(^bZp>bN;rCzndQaiw*u4C}{Q#0{<Nc{ww+WM?L;GwrA%0AAIj0ssDece01(c
z_BQ_opMMu`{%hd&FCYFtqrUIX^nbRKvs59Jke8dQn)537hU$=|6kzQ7F$%&E1tas>
z^2sSFVPX7zr5FMx99-})4YN&z5&=Sm@=~#;`F5dU24Ku@Q9KEzOtYzquCH!isIj{}
z#6DeLON&J_E$3SfQ(NyBiVS~B@rV*eRXv7Z&r5npKV33k;-x&>a$dqcaYj7}oJlYS
zey~IT=J>c9cM=@Z6Rb6bw?>~BF{<yaMSkzilJDcu-pWf}S|?SNX=%!0UcF<+!qOOb
z`<vazv%c|@8fR0Wi~Dm^;1td~dCG6ylS{CGx@!9l7H0GHsL>1o0YP!|L$DYO%%h82
zuihw7wzTEYd>Vdw$5s|jGNjj6ZXCt>Z+3E5OUBxJj92d0RIS{!uH-Nu-1N?mV7-eY
z&<VK4?KZo<e%+8*;k<}}<Ur9@ng~Z7+@?vplwru}r1h-MPL+4IYt{XZrRlFpJKz$T
zhBVy!waB!Z9(s#rnt{%-N<$GwnF(}b6vhyY@koQrwA%^HKj~`YQo2j@NEm3w+>AMC
z6PCIcX%rJ8#z2foc1EZS^XTAzg4G74Oi&pmQ!yqKj7b|J-LllCI{Vlds8UnVlOc4$
z=mXLSCgem0pbdf<xfuO1MpGG4C&-Vi(!KtKitFNz0Y)+3rO#Am;SR?SJr2H@)NK}w
zGHpKE^po^!_B5O^-hq!T@_&~xFw<)py61g8{51jVWWqPtY4uA34qF9l7zoG*<3_K<
z@N^ofCTIzf!F9J@$=9155QT9$BD=NvFd+hq2}Qr%3LPgxi|*9p8T|l;@`xBHD2`ks
zDxYk>3IMW#-kXz1Yb_d2mZey}fc4XzDJ&9-i@eI2RLti`Duo4<2%_7{Qx4&7Vwn26
zw?Z<fKZ8ZG)ava(_<^Rm<<1*9?fr77RLCS^`J<>5F0x5v8Z7dHaOU1DV?3%wpDr4W
zO;1_}*+6UXJG9sxV@y6+Lyi>fdC%-G=d+7AuZQThizf)plOlYFp#h2q3piKeL3}`X
z)vT6Zm*b~<H}G5S$i@kmPbsBgHQ(8nIh4iHqQHQsYz#9PM)VN>i_S~^hwZYb_G9OJ
zT#|(5bpGX}2V76aCj~}o$sqlshYdgHv+fcMgCf4g=rGsP4&swp(q?cF?$e-<IB9H+
zt0$b68WRQ4MDihz4I;pdG+V|BQS{X4-R=kK-j6ob7(cFVrc3%X8RJQ8X_44CxZ)kh
zOs8jhSd`G{%DvHFd<hztH`>Ap_3ekIZ9qJ}#aTvvN!mL<t2Zq1v3Efms;69m+m-g_
zC>>cxrt*_gXDP2GF49Mr`HWL<qPy;Ihnntt{fQIG45?RTHtc?;`C>Zqp-#>;QZo~g
z(jy=?juA7Y-O$eKQB07#@EmVmcp%F8UQ1Rt=WzIZivP81g=L$u!c)tY919TGG3QgM
zC@dm8hh_uNMO(&m7c7ny&+hUwgt_!Ghqu=@?=bf6Qt|o(?aQUQ$8;Vv^7Ol8+<{0Q
zeFD0!Hh5G1@N@O!acrr%qqjnpWQDS0SoC<ONhoh#iN-XB80@t$sowf)UR+^L9i#SJ
zkL;Ew5NMuEDG|pC?E{2qyWdtJu=!<u)MmMh-}&vi3I*eZO<|Rw^n>!Dv7$DSM+b@H
zk7iNW7^3^HD1tVLAOq>yq&(Ap=DoV$uhh4KRZ;Lc>$0Uq281TWvwc2`X?q4Dt#;Us
z9>+=scnpc)bHOK|qg_jHWuRTB!e*+euz{*p07=G3PBV*YsAoH=4s=lTj|N^k8}fX=
zIyGdeaF05sm7Di?Scf>ntP?=UsD-gWWq>~C5c?gf!hc9HUHuKyKK4s0A6L~QwZ0@7
zZ7`9IODF&jzJ0^a^dl9p6d0oo-P?HVI6HeV66Mf*YVSVW-$9i$BT|W7{gTKyXS#&3
zMKbqbqffB!57KV)`+M4P2kmY&Ps`c;u6=zP4u(Uy1Mm1PR<;~V4*tZFsfTlDnYxbB
z^}bsY@5i+4orTN<a=MQg<-_z#Yh_TTQ@=`J_VJ(185#21)9n6fudl!yi%e8lfuu0E
z;8(QX(B*qihU6FNiUJ;TDXX|ta;n&nOHxLHzR}uZh41-!Hz(>RU|z&xfM&a!3#*Kn
z92J-tkByTEfb^h@cJF~Z)U`tlLCzn##rG0woBQ;YeOmi40;Pi5VRPBVRm$V?(5sah
z2^~+G(F9FnmS{l3HcF`A$6k{xTAs<Yhj6*oK}ROQl)MQ-sRaR?n2u03VvY;2fmlOD
zhu%v%1}y4oUAQjpn~sS30`Hiu78PXq$YMe3e%ZKdf7BP=hu-;KgA=6nq6m0XEXI;3
z39155RV99kg|XBsM{Dn?;tr#apgTG%npSC5RV{8rXBhHYI8i-}H8m?P2g<ObIfwc9
zZcDVP5~X&<&iCUo>$c^!)Up=72(*jbO~Zu$z|29@N`{|7+ICQovDSTc0Ddo#B^IcK
zY0t?EtIo;G1OEwdMYf48@Ogv~PNb*dTE6Yb-FL79{i1?iZg6O+to0!4x+?UAQ`@`Y
z<U?A$MFaM(?gC?l5%`k<nVDi`sf(kyQ>(zFKRL+y_QH|=$IrtI+tg6b)$C6Imezh>
zMT?xsW=v>YxYZoDAD}&(Pm#{Jh-^c)0?{o%N(y!IB{RNq!=V>Ll`aZwLyyc<*J&DQ
z<?XBynZ|S;F=;UH00|ckJ%@>0fEntv_=hSeKll1nU2Cp~sdgc46&SSyfl7*s`04hl
zhd@dQTkJH&_{6Im2LT_FDAF0MotKN@{Z<g0yMTNCow9<EswK-`KL1Q4Ze8xK;oxko
zlGTZWO*=L5w6a%%STIbdZ~%8oB?B2WK<r(RIpV}y#}@Rf`;)E6Lrg{qAi=DPEO&DV
z;2E~RKQ3gVlX{!Y$7E=57-MIgjV%{_cF!zJ!O11DPvNMBl)ZsF${_(v4Ki6JYZxef
zb_bUqg(u!=f%@~*pi=t2AB2`v*<(p+6EQX;w!zpV<aqzJw$LcD=PdPx=Rj6&lhaB9
zsT(`Qkb`8#j?3CBnFznzJgMe=f-`*b>B$h7J7Jw<PKn^z%itp2P0si4-0;xV(^Z*M
zumtj{4cOTdQWmUomL)7P>X05qG3fmK>OF>iI$b^y$8<WYWD;u;TluBuA8xhaCD=8>
z^U^$xbYZwG@OCw*pN=vL)Rn8!5yRI7p^p6Bm+~gr982383jt_@`gOIGmr6+H{ShmW
zpUTvY0*=z~p$X=#yj*uk6UM4fcYeFQt)rl5=o1*qo;}s&P1*vite>Y64!9f}@`=b)
z-$7s&WOf=}6e}#8(JJ_Di7qy~Ecf$pM9uB5!IuA3#c$G#sV@4!^EiX6vQ!gR6E-s%
zZcl)qX@r8DwoweYA4_~cKq--q>%JP>tp4cRCmYi^H3{&8)~E*sgjlM)wOLC%1PRxE
zx>_;|Usv)fl|-DIX;8q<Htt3EH57JD_vfyZy1{%SK>TptBb^U9eYDj=G&*#{Ma10r
zA%0J1JHVA&*bkochcD^6Nas++3dJV#F#V2E+YK^#>OfNZSkkq!82w{4#|zvHR6ML$
zR2ziup=CV+tFznK*sAdrm0y#YJcVQ1bi^AbiW%!^8rM1&7044p%t_MbGU;WH>Vd`f
zvsltE{UBnIOXmCYfr%qo5hb&zmY{5}C#TTZVMGox7ITg2RU#Ei75n=5sScdA71iZ!
zrJtyJ4a<ljK@0S?2Zz_iyRMntcx~9>dq`exnA@~G39zdn1FrFexKjBqa3yzFf(`ca
zKziy{a_))W?`TlXQVIEj_x#}p&()AFIl!2hm@!{63vfC6tsz*1(<bk=%!mv%a0J81
z)V7xId;43Oi5LZ7;XJz!s6CtCi*9g8bQ)$b1SW)746@`0Krmq?r(I?iKue28cIO6~
z8(>*Yh(HK>2gNLLEmx5EspW(<=GRe!|K71$2;v@q91+S@iahvtfz&F|GF~~A5d$(a
zYA<Fcfgzp7>=kL0^pWBTm8DYOA^>>uq+;I~UXtPM7R%JGzDvMI^|U{K;QEcGUTDcH
z$3>%#C;G{Q0o`_Gc;OPjO7)z>k);JJnoA?mlXQNn)&*~grnr7+dg2}+OFd&OG(!ja
zDXO;Epl|=o3VMsNMp#=Kbb(pqnh<mr8Og17*gh6%zg9r<wkCf7+OGk>7SM^=oPAUc
z*o@823;JFC4g-1pu|Vo*-P_{f<uwP~)`}bOl5SbLo?`Fi&@orF*k+n!c)f)#8H_ky
z!omhYhU29TKm;zERd>=XXD~gDw;lD2Ufh!`NrVw;C&CD+NT<Yv17MZf9>xJ-V39*y
z53;(zSfAMuOU_DcDF<xdoJfOUi~5WIatn$i(;kGP2}8Lm97n90k;LCTAnZX-zJC7b
zeKcQdSy1&!bbB8mvr4I6$SO9zjx5(3&r=SHZj@Fho7mcczi1FK61}A4xj^8C&YMrU
zZSaV1cu3I<?&o{sRpxqcRnp0wH?I#$9W2y>pR%%uTvOasnsXkkaPotR^FWkA=ahAd
z>U_a6NfXV{o{P-tBE?!aV_P8c9@$@q%)Fn>DjAXL7PL&-Oz=<^x;3HmD;N~zk^x>9
zmefT}D<+n>D4e=uTHOe(;cnduwF5VNx6vE9ztFJ9_LF?w=P$_>LhMb*(K-8N8a}PU
zlDK4?bh=>X=oq!%A3^kdK~Y^=9%b&R*cKuh1a;Mf>vnD?AINM&_b~2a^C*869(h*`
zBcj~UMae<DNRJF~kU>M;xA(k%dSC9*UWC^qZ(X7dz;NW+f;7V@&wMdn!*gJifDC85
z$eElDl#Rg&kiFjez8jX&r&oN3Y<F0r9qw@Zq%CZM<xrzIc|6S@cDLN8qQ9&&(Y?<?
zdy^vd@f^-e`5E%{df`E&jc45}=x)gXeOV^Ts2+ulN^}+X<4GamFJ$L5x7#C&9ddh=
z57kIRw*SY2h6eW_tOwdVm3l6HH-~8sTwtGm`v#|{jO+Fs0QlKp_j9lQV4_vbN+J`G
z*4#(O1)t16Hgo{KQJcCjm1q$-tN2-U5C*|A3%*+^rBTAXmSz||2Tdo}8H&TZgANg8
z4}BVcFY0{pZ2NqF`>=8Ca8T?0h2TexFIoCG^Y4JzJ;lPj-1IZhw0tJ<<Akdq+BzYp
z2VTuqia~U1W=l0d$4|dn(C2Nopd({sgpz;}yAj1};0H(DoNavm5-BM}c~qY=7OZTc
z1dscbBcaAdV;LL3t>4JztYL;sFDU)&Ccib9Pj$Mif16N*G1ML}S`xMKFc#zX+;?w*
zGuP1Y;1CKF)J@pQ3D3G4g-m*w1&`O~Z`HmRVMPHDZ=(89TXyJj-YGYx15i}kP>bC<
zi_W&yI;NL~IwqC|NPR2W5kqM(qBQdm{h@vfa|g-aOO!Y}wRU1X7h@xei~V$ebLSV3
z5Z4-xbDzT}R<dkJeFkY^7~$1rGQ*dkg+=am%hxa5n$VN9^n1Bv5Afk1E)DM%4B|zt
zc^(p26QVa58z;<@34=8@wT^;Um0WW*CdKDY;wO}D2<%(@+%bYa)Dv_g?@IL&{OP~W
zVElXAWJOo|)t&6!%{iJR3@I>7sLNM)1Jo#WxIN#nQbp^Ef=GlH57}Z@QjduhnV0Ru
z5a1;*HL9)*+M9sG^O}|gg#|%~4F`>jO-SY7mLC;6`H?+fB45tvF{@UFzOt6qKckl5
z^hQtYDncphWQXPPAonhMn6l>81ge~PHzK23;)e1B;M|8qak^$6zt94fuuc!N#pu4K
zM$w#oTpyd7o@+YKVUv8pamhkwTcsmB3xZBKBP5dhh;8klT@L?Vy6DwskWJx~!#2EQ
zalf!O*D8oHC9xo9wb(^s6A4#`#Ncv%Y-~Pyhj_fMm_3wir#XElpbF^ql6O^Rv5X%8
zI_;Hc|N4Tv+N4}oYqSI}A!4LkyE*@zSd`u6a+$k+<LVft3L71ZCwY_%!f?>s{OPY-
zH5B<yC4ajJ>SfSTOLulJ9b@M3+Nt%7a$GFtGtA;mR_RAWQ+{-hR{p#-gaFPl5izEb
zE6tEeRciEH80>%%_#mx$OrV{WT`o`6u{zNCg?0mRU`K|v9@DK-i_eDpv!xn|?|lCX
zZ;%H6^F?xSPco!9mqY{ABHyne>ND@qx}v+YRi*CzjihN0cltmdgT^s_iG&+x0l73R
zjnlz}&;P(SG>ME}KKi|j`>*8fEvWm)BN=s@kjD+u)9rFoN<)m<{7GRc015))w2rR|
zBoOe&*3ebQ?hVr6aM~#@-OzXu>R4h`%`Qw+>P&s$nl_Mn#_q80)h_PzwysRz*;mK7
zSlnie2~~b5Oc+HClw@@dZU1r4psO+pKnrm9Qj1i13Gpo>_9Ol&p2h|t#zm?t0QQFq
z^@NVoRp%E)cqwFc1TAP}h@>sgv(W8675n1MFB5}slUcBL(#jw4@JQ#^cnEcj^nvbh
zw@gFc#d%c<BC_<!`}?%XHZ#68zilw$ByG}&lu^P1X3{nl2Ax!pio&fHKS3@Xxvvd)
z-GUfOp`acZTnWZ>LREXueGpt~=L%+#rwfEb_?;nXW*|bwg>l~I+(dA)_mC4(GxbFs
zM&r!FkN)_00u+4jYIsh4L$K4;wTo%iT=(in&!Op=-!saio8FXPjUZ@-IU34+J!YPw
zyEZvIT$18n5<&-w)$hn7wDv4dM_!T#$aiJ}TI^gc5-y-FLZaihDgLZ;VfL9W)94DP
z-jJk$YbFJE#(yeR$AB=kc7TGkN_*6!g6CbPe7W9P-?$j6cilLBikj?7UbTO+=M#%G
z%h*`6VRx16>JHVV4+3Y{2QCri@UhrF_x5e`dNJZ$`Fr{^Wv9DZ1+r)*lm{b=9XlHZ
zFu~4ZD>9OGEe%hTxYWUy+{Zv84Y`GLSOIJk!Swe)?@B$2>LB3|K;y&{9t_rQ;ClDF
z=?w^-uJrL`X^~E%?igVEDBkd9T4Q+<*@OM&&Qr_uDSYtTvej$UUw+WyuIkwBkuTVG
z<o(vJ@M{#(b9ndzVxkID4Vp2g;AozO+3G5`QABLjs7R`Gbk1<+5Y)d{+|0=|{`-)|
z16>--N=N&{FQ&esw5Xk;)Vt#=x>TC6Co|~N?ySIr4Smvho216lLHbFV!h<$g=!+-)
z$|^s8pa2avVc+v3s6^!DV(Ym=gA-OKm$ph=3+o~c7v~ozd(GR-AMUsMX2x|M_atZn
zXXip!`dCCMq5JEe+~zg=J?1%HeT>dJX+8}00lI*l=XnQ%N811iaM&|0yEpm?3>z2+
zk!QK$@VP-HiLeGHgtMU#Pi>w{s)Y+z3I$)O6`UHVsX1iRyE6Ane7+94s22q_e(fg}
zISH>mL{J>2s=fTr%*<aYOgJp|5uErf)ve@YqU4l80+g0)Md(peTF37BDA%)mHalrp
za~aA+f4jI#VEfQE=FO~xM5s|GvRa$H51HZo&}~8;f1q*n^Vo?vd>62R*%Q0Ay1nU`
zQDk33{d+Ophp>3h&9AVx>j$|dn%qP9q_mIaT_y3%e)&D3IvjzlYjC=FyFH%N?LTtI
zH813!igW)ZBymS~m)yV%1Ah(TOGd)tt=X8f%_Hb2hs10v&SLK7=XAExa<%_dY<D_S
zQME;=4HZJt>&NC8CCBRy09N0oYTF=(AL7`;wD$gjzskSxoIDGpHtOz~LU6W)0<JWH
zFuo1SSH!ihnxIpPon)`<LpJ&ZRV+|T0+xb3fI;`IPX_{K1WTo#`1;|SX(;sH<QM;t
z^Z%>-;(wq1`q%j4d%x;CzM#ivXZzke`H%GCJ9_vZ(u;rcivN^e{7-{F^xw0n|4n-F
z{}Nb8{I`(>fKG)@l}?TBe~K>tWrp-$rcwSa!1!lS^Pl|UpWpXZ)jz-gEy?)rEa(3c
zX#5X;^FI?u|L!^eyXpL|`tyIx`Th60^Z$=X<ey5!f3ctcQ%B<eLkEe4h3!At&rR;&
zs!Gf4Pq?DSHdy8pEU~Fwb>JS?BtOlmb%*>R3GwHGwUZbU^^ChQ(A`afd2;i9g~Omk
z)rlBHrlT{5g^5JbKV~+K5*~joXU`5E5VJmS9B-LW#hj>nbia9DzkiEWs{!$1#0;Xx
zO$jBTKA0q(qzJ(e@Y*SFyAwHr?eSH)HOOyIo*pA7C3Fzc!17i;je0*wlEe;nN2B`)
z>h!$3I|whfZt7byxICUF?3@)zrPHli$VbQ~rpFbnk1sIl#>qw){XLOPTWD9tr^yJH
zuA<yJ2nhv_q%zTQaJc&z$vp*ZQ?Gx%+d0pABXnFWy#>Ap3ctFp(?BGPuC%#=y{@E1
zW$6ArL&ru&#MnhLKE}SdL?Znz9~s4YCJ5db55u#T%na#C_NRz+qO?()yEPt%r{Yi<
zbY<>$YIy=KH7`3Xs<t`49)YU;u>4u3(m3rGK4xuJo^h6KJ_5rMNM+MFtlnxd7)>Ux
z45Z};N|hHZ&DomL(*>h)m*gDE;uh4+k(;71rK?J|ay2PD%z>GM*TkZVSma?B*vuIZ
z#iB|!$jQuqHU+6mNf#!|C6pE|7@pCXV=yPF%Saah<mStg7NEz{(#6~48cLT-2@)go
zDWdgDwUulrL(fD<DJ;r)nSC+Ex`bfzwTo5Ff1^lu39QOBFI2>D#q%ss#?zFO6(v6;
zY+USa6sU3NwAUOjWSep4sJf|ZB~2zS&QP6Br|wR)X&bl?J-?N;Nak#LF}ziuuomhV
z_joMHFR8q}Kl5_vzWEXX>tLMl-keE7fx+M?ecAVpldZx(-R-ak@Ol_+Qn_ROIjXE$
zdm5|&*6I4Y*0&r;&=710{d)G;xJ+jcq=~39wRxcBR-om>8L-J4j0$tkNo}wau!}p;
zgDuZmFD0ku666PA#R?$<7Iq`CEWy&nmxTg%n+u?DG03?ZX1V3`7gRAtXZMmR3Gk8w
z7tIsdoONjpds+y_7=2dDokznJX=V|l;|wA@N8*Q}T+fw;|1)|ju}NcJ2wTlxbU0@p
z8uY>fmI8%Bt$%Y2aE+NzHw8`jJ&4c0Kyu(DPFcx+O3fA69@=&HedO*#7y4yFBFvbH
zcjP9Pn2OztglsWtSh>*yk(vxoq2QbtPtSkqIJrtJP>{<$&&U1i;q_iPLz6A@CB=kQ
z5Z&|)!vVj&QYt>YdPrfHdDjTl&Q!xJsqC|EX-UXNg{+VCmmdmz9?|lFV@COJ>^*!6
zZW<bl=OG!E7v@Igq_k`p_jN)8aU_%cfyH}1*uGlyei|2RzU(=4U%;+A78hK_e*e!-
zQVYw&2YxLc>=5<V2QIPf_CRy;AWtu604-+JPs<tT$_a@jbNo$IM1%d#Ay1`K9#Zdx
zK5#jswkFJeg8Vqw`pCiuFJKPcc>F8Uw-3Ju-DLrQb#9Fjx%1to(}PS>e_TW|l$TLx
zm(TV2I&97kPteDH{WD!Z%+bA>%PkeQwm+|KT=Z7Sxb^XjWW7yBaR==>8<gZL>rF~!
z2!{t=jFc(Lr_=BRsDmcz>iv>HXTCtHe(b`w&?9S+Klq&qg2flGvF)~<;H~7#q0KNM
z#@Xiq)&pY5XB}q;(uU;0umG&tRuJ&a&tpv3MI1{yv%pVV{N|+&J*>*WtXil{!4@!^
zRn&dpo}yn~_L7vIM4nSuoNlc<_`gZrv(%-dgMWw@lPr)L2)O#I7p+Y0zqZ15<W4nm
zIe{6jVp=c^ZU~hg$4-C*%-?T#{s=0E)H#U2Ti=<SCL*p_*d(B5>B};qk1obpaEszv
zlhAOniezw-W)kPXgWP4FqwT1EwZu3<F~e*kjB?00_I5WbAX?c&afNhiHlSmIkrf!9
zsSGh?JBx!;6NijqNRs`dNZ;TJ{X62e9ry&E!xzw^^L|=E;Tq|;OyXbuBb9LEckE5`
z9t<hdEPiB7?1w8v(nca7bM~Y<<)$Akl*N7bFX;Rp4}VoA(+^fv2B-5eM--p5Kb<QK
zDh&$UFV<f@rU5|$xKM|=lXPXgj+LFDsaPgtyV<SNb7oLK4}%V8J%tW&ffwkP`VCA^
z;t^|t49jX;EPiDu{!$+?4o`$1aH>fp*|n{JRAZ?(Z;djB_b481zw|z}LdjcHviHxZ
z1W8gC9#goTDGhUF|Lxj6(N1-%rl8Y90_}~@eN$2t?j4)gcB0G##!jR%XNGi(jSKhJ
zD76SN2soMJQ`{MC>)u?$c?3el53O=OQm12N+hH*}t<X8Je|vA)6)6cmQH_ADVYWUC
zNnfK=mWH_OjNX&0xi6r*Su}p?=`5NQm-B1+R|BF^13WxcOz6$#X~7!sME_z;Eb?R5
z+v%eQc(KhvRR?LTOsB5EBn%?c0gHV_taDeO{i>K6TQ>4l;Bmq)=s30XvOA#fqsTjs
z-9M9r#)`>6qYp_5k|yT7n;DoeF<H>|5u}2sk%KgxT1H_QBty-;FOOvW3xKKF-u8oh
zw&LS`hZ<zn@jMd?fTc-O;=4Iv1kl}#6vF!CR5kDvTD*ci^`$UN_!Tg`A7yo$WLzN6
zAgH}=ppysprfmqdVF<OIVs%h!mQ=)os0!aYEWS3$)sn6x@1jLseo7s=fXvP>fJ$@C
zdGYg(-RGFuxfJ*NaIvBsayz)>LP!0W4<_VFY_*_)2A~IG7{z;q$XyUnw9`y5KdVVd
z8QN2#sp6!f@a3;eyR5zuuPoL_Y-4>mpY0wfiU*aSoz!y&#lcTl!mTqnttKbi5Vbup
z%$PTxO*qN{C4h{n;=<(_@TZk5n@1I8@?bd*k<e&xLOe0>X0bU`%=_5b(_XK`$2V2t
zlaJ^9nqq+YGw>WoY$1>vJzs1n2=@CmmyU>kG>Rb)B;h6)1L0^Z+4*)PYBQ&-M{I^v
zsSCs(|3@)@D25g02^|BX*DDHB@~(=4!11oK=&bkLyTw|~*1H{vw&uWTy!W|RVXlr4
zs$0T%nO&h~3c^WhBsWC86v;^aN5Fa#TE=U=uh(Dp-UB`WL|-HjNW2bno*LLFT*^!=
zBt<$|r2ecN5^gv^WQ=LwYgk5sel}+p3{$d3G_$kz`h)o@Yvi%9U}N5cf{Xf)`e6sL
zOgDjjnao-;NVGr3(m`a4c}h1vsKO)x0S%-fr|+cU&k&@izv?GziMNG~0x99tQjc##
zba|&SZc-jn>&3MUf+aD@o%*@Mo9`9^_R8^MXJYp5)x(kHVO#?CTjC$VA&wr1atbb7
z!2rgWrFWH}W)fU7jBO~J45=g%njaH}n73{K(r>Z*i5?fCk@;<yF&5KaBix@FsI^Zy
zv|5@h8Z>S~U9%$B=HqBBL$lEI;dt=Cz4dwGMC?sEVm1h};3i#MfmBE=$nHdvr6vDi
zN0<h~*xrc?L;(51&nkJ<k*^BQpI8h)l@vx~*9~(OhM6RciLX>p3_2#}7gWX?e|jvN
zni6q1W;P;8_e*HtcADkQHLLzdJ%MD3mV_;QKJ>9SJfd{`zU-_jgDCi~FXZuu`d6B0
zV%fiRiKi+0Y0e%qip|x|QR-O^q@fimODX(FicKABbc9UaCG#Z59yvT*5$>@Ht1lCI
z)f5W59(!^_LC#ubCvvGA$!x@AOKs|voQ1T%{Lp9k%-i(|X`3;k(nG^IMrx}pz=-OI
zRl5Q*;qY0P)T|r%@;G?;a{xOzINZP$mVffijR@M}%rrXi#wMF>ND+RKvI5ktvB?ld
z;P~j`ei@P)6&?xFo-jU;Z0Dn9*w{sQw`eCpHNVEjWu;Ar9t`>Jja6J{GK;P8@^R8`
z66dm@W2pMSRN=n6TsqO+{V;x#SU$YFdl@UAn=5D0Ss75lWm8{*oY-~oBE8pU+A3CO
zLqh$1-wnz6yp^|9aHx5-1g6t38)*{GvnwlFU9cQnGWgy*s&D;DzqHlo)3F|1TrdJg
zLvA`3_j2l8K`+2(Qa?oUo*@|y0przGeU&_=i5|8Cn^*hB4#8|b7EgfqR7ff+vdj2V
z3ezr*Hy$E8d2s*CE$WjFXYJ{Na+wUm$R5Z2w>#w|P25(>5U~_4KaR0_T54Qsk;2O9
zRGp-dzOTD>-Sb?~z5<LRch-I!W$yt<bm&%sRF*D*SXL}4--|qqxe5D(n)TaykMB|;
zHeH9{zX!vM&z?kefLtUog9ox36*r@xI$D|d;!jFThFWcoEuyPvL7pt$PI<5+SHTjL
zqvP5VDVdTYg0D`usS&%eeH2@xz1v02mxV<nEDL~*u-e5q9g+hfS!H(%8v5g9r3~Y8
zVJSg0iZzc<V&oF)S|;KW3H{`CFsC+N1Os0IwLPx-+bAFCwJF{#eC*d|`g&7llPv{f
zy(4dRE~CR42SWpu3+}@!_j7#pHs;ak@VTZMIk1+uxUyo$Df;h6UXr~>8w#!guh$d4
zn%>DB$PUbXbEL>v2z`ruG{~L$G%|7#vK{^vgj6%sbo@?f)>FtUyGPPAP3|PQE&S9w
zm0U_+DMRjc7$epo;)KGFV5M5o=asc_(Z<6YXycz^klvSd#!l8C@+Kh0b0|c0TVRs%
zPU;A=F6!&)BEM6QQ?bnG=_4#?RHpc%>wm+qx;)q3qC=iNB(~<l<GJYmG#K*@`7`mS
zJhgwNl{{#p#O-)=-fEb30e(lLyp*q)Gs_tF$Xg}+FUiQtU{)<rCsTEKedmay+wClS
z>vG_3zA3)p1mx;ML3dkQS+2xVDWbAxVS+_0)o)q{nS49bDHAIknvy41mAfV0hUvXw
z6^NqcBJyer><sm40$K*wy_yZ7Z&_@~&RE7}j#^rl=h$(7M~sFJdtr|JGGz;dQQ@wa
zB_(vdQa^Fa$g1G7o8Vm<%~O8~H0MJiY9c|b)Pv{#Mo5j=l!<~~b?Y-5<`#nzL?fUO
za6*6r23fGW;fc_h`k@rMe`6$&QG&RZ`RL`w7#}O4Z|Lgtd=9ANDBYQ%=}%WdT*{0z
zveU4|i@O}rrj|t<s)tf=R3M(34%Q~eF<RR?5r}3exV#;AoiXrO*f;hNbwlV06cO~n
zvFhhNIow=Ui+5D^G<>PXxtr__)Te~AY6Z-w*Tt|hHL^JNx|upouWP}J&f&iM|H4ur
zk*bc*FRdQ6w9ga466dk%)O3a~&tpkC-I5*u;+P=SE-iFW;LFM&?_B%rC8g7@7$yBt
zHNJ3$zU#FHB8r<aZWW+bXyq#>*~T4QW#8?AVO5qFYky$6ltFP^)Q{g7QZ>W>>QWtJ
z%S)ZX@e{6>Wjltzv2&V02%{k}&5Xj(yZm(~%|_Vu=RSKmEY04ynH76R0I82W0Q@VE
znoH?ilxsm)*?F=8-tBwP!)#gP0VSgIhK&i76JE_SrUYT`(C^RQ=m-LhuuX26DOQNh
z?)!z9<vsJ5u=(J&*8_4F3l5bX0WO0bv`v&u>Mnv22C<m^E)c=7O}Ny=SfJU0r1RI!
zZgZx-lR8jJHH0J>JCb~TM}2u=VfpHEqtPwbavb&*v(8`o?$4G9%>^v*@?UyLhsYd{
za^VM|CoiuI3+Fj4-5v}_P#>5v*I#Q;K9ao)rJ2Hus7{Y!BFH$+YgZ6~O;gCAyiZJo
z%ZfQzaZ}{%&ph^?K40{R){T+eh%1n<y&FBAhXrei$-Z45f0u}I$;d^$D^N})K#A#Z
z-NH!Njo!lRU)r^>x`3Ys9r%+vAj2kdK9=S{EwEgUNbbWt^bZZqE6t$C<*g-Gj-as8
zNMe@q|CUbAC$N?)Zfh;}T9vOt&-Z~U5HxGXrD(#6kWW%hQt4+?u9V$q8DTUgE!`a!
zzi|=$*6A$`Rb4?q>mMEoK$h=a&>xI;VtbN_w)T;P(UDJtQt@kxsMK9M3(_FFn|@Z%
zPx)LlH|KDriGed@I`LSI80dTH>BzW?W{982X314dp1^f|iIS(B;D=i+Ny5|ejY%b2
zIitnx+ePw;Uq6<N?C1ffQYeG&!b?vu57>SpaCCr}2$*@g9P)q_$^<*w@AvpaJtD2Z
zmo6|sr7|k`Qx}Uox-fIWumnd*xx~5f8pd2~46aH`XCjhL7lMo?nV$q1ozbD`QTN9Y
zi#x{6ikXJWPG2EVxfpR8faxS`<%hxnzGDNkd{SRtoIC-)GQBd45i<(*Q7u$oZA6<H
z$UwOd)l<RXizgef4X*}4@%i(;fUN(|D*58-(Wdw*#*;M+iaXTeh78C}&K*LB$A_Sp
z*-A(*0}s>^$-swB69$;JjG{zJ97F|npA?$oe|AbJNU|7>4w4mOjOn^(JLq2nR{kU^
z-U^$E5@KrV;oKjAm8e}!y@GGHGLy~Kn0+_IcHC+4HYe}sB`c^#ju;#wC)tOjwb4F!
z$k4a$p9~iEH3#!zy*{P{uU@y@p!tCqq9l0`_9pFaI^KOj`U61nc{;hc*eDr@8wu-+
zyF1In2MQ+rTH~ma=YF|+&E+$n@sWq`m_M)|22v{hNeT)X8Va^Kh32-q+`NZlBM{dV
zL(nbWO3r-_eZgL14!QIuj9{)Y2%kyZAY}@&jpG=R$5L7o3e|0wZp0g0cj&fX)!u}w
z&Eewh*LDc>GV>sJ&p?x6QaZPnWN>gKB?rIr*W4!L8loN(ZYNuh=ofY-M(k!lZ7gHY
z4T^-yVrRe3v0BCyMntQEB|#=ZX5lueEK2t`T$vOS9X4l^BJE6q7ydHa4pl=lnwKy2
z$g}y&ufz4pd=y?#pMbx`!lI2K91TE%Rgo;k%kxPtAP}+wX$dO`Ul^`HJ$#E%k5e3~
zTe04_xUC3oArzHyeYHU;XO0pjG)Bh^e9<r+Y01=;&UqiHY!z6nHe?uu2zs|-2~s`l
z94}u~II#KaN8i1(LEqqj)8io1xV*wFr6f0{00hypx^Xu56f?wT%`ktA_LMQqJk$p-
z9a_mjgIjY3SlO8rC{rLt;m}Y7SIR)?6}W$MJ9PlH#g)iifg*8q$zaw)*!{BxEsx}!
z<!$##+pn2&jHhdj>RP>aiSrd_F*NoyZ}ss7g5x9899Z>W0(UwM9K>lkDxZ)i_%7w-
z8hhv9h3oH-UjGA;rHE2ED}2~#-0B7zhza8in~?ilRv?MpGD2+Sz_6H0@<N<k;c(cP
z%n%&)ZEtte!s>>JUp${|Z1HVh&R~m_jtOm`#)U(?2l)lR<;rLE<k_(JSRk65Ct?;m
zZxC$ql6Llbw!&V>IW4V>&Umog%^z8o_&Hl_RzeS614|D&K~yvlRC!ow34ngi=2>&x
z^NG_Iq!o1{Jw9pkwP&p9LYI63LYBfhFSMe-DAA>m<MqgKwpht=O&Svh4MP!ANjE(;
zwtxiT?w$qo*#k(ldYQw|KYN2@Qv9I2oXd-&&R^bgu(wSQ@-R5JaH!GZ^uiNoCzMUD
zyM<jH0b<u0r{r0r_Zgg#hSGXHsy1YyQb_@FFP4i-%9o=+q1XsNWmNDCy~%-f;E84s
zISTrDAM%{GFgc6!L6S2sfpmJR_?GbAh?#NK)H5M238>&R%Sg<KPpO!mT5?$If8l-C
zjO$7RnF^GEiEqxhye3?SbZl0CqleVn*U$g#i%4@`ew{sRC@vUoI~%z_=MBKJ{fo0{
zB2_5WHRD!-)#~Swaa>1avgQ3eh!%AmIr%&&aJL+LJcRd!UAm5w32Syr;rTEH|6GgH
zZSm7<F3IjsoaqhALQC;gMTu>|=sdo0^SJ)9o`ypTfNs~Z3_~AMDKMx!&4spJ-Xy+;
zkJF)}sP0thsPW{mflM^WffKd9Dq*Xk=j5RC4~a7D#Kq40u{i_W0?W6ej&yx+5dMH{
zH#yR+Hn+;Gw8o6&HHezTujYn*vD?9$GBhk;DMf*N-+ycv{Bhb7+mEmX4<-8Dj4d1y
ze+gPFMoh~2ar@h3H2%PFR?H#mV5>da@R&%wD|=boO0O-s0TpaVYJd71Cf<gl=)^!O
zmJry{uBf0#Bg7bZ4c2r3feDSi)J3~`g+wY@Atn3w`s`NJIum6-tgTO~Lg0I5tq0n3
zS3~B_pi~!a#MfmNd2nYl!l`O&86WTxfid$L=eq*p?^6k<bjXDwyN{Lvnf+DY7T~Q~
zl<6VptEyVntYQ@t)UCFsN&2go=cLe|3n_KAt=d(kUnNm{=Gd?BrhoZV`-}Yq3Q+O(
zSOYu=s8*k!)wd=&%Wo@=xs?6H@5%$`>dXCu_ch>XQUg6~{vgMk^4{F=4%j(jNf*PJ
zkX-E8UHW(_5Ytb0s0s{5B3ntBOp9JVd*fd3eOP*-ro`ouBqYu9ciIb|P{vq!wuhr<
zS&Pq3o`)V~znTiP6-{#?17dd*%EJ_j!?fVWpnLlHR$wM<J06Mtu3Nu+Y+`6ew1-CK
z;cat0=oP>!RIvT9Y8OdJ%*@BnNsdQQNuStZ4NGa6NtY4lp-jxDYz)2}461I#c%n{q
zx!CrTTtLJ4le3I~j|e6K8ba#=ptJsPH0BbQBOxPa_x#d<9Rr!=UoXr|{OFJD{E)iU
zPs}@4uKSdqk}j*o^&08dI<(r(;Jo4hwDw`z#$RWmbSmsZr~1>c!b;1$qjlqX|GeTK
zhL}$)#%j0OfevWdKezlrO&7r*U)W%qKpwzp+jX!xH$IFtNHd=ndUqOR0b!&KY17wF
zoPxe#U^GE{3od<2j8s+ZtT5bnEh(>oHV*J-Nr!@4?Q{V7ZQmY0c%uX}uO;$cj#y&v
zXRlUu1#v+%y)0)DZH3RRtUN<ZN&WKU^CNUbXtYjr<f~dqHSDM^jzL*-%zfx;;h-t7
z?)QosK*Q}yUm687<U%ILw*)$$%u7h!U+wLmB4DguCB>Eonj&syC0JBARnMH;3*zsb
zEZcOp0jt?Hl)(Ny@(Npife5>b&pRxpddNo)+cpXNL!dPwkOPUpRW|8%uy~B1Y`L6)
z`e}94n4uc#&BJCR=R*&B@0vMc>p*Gw;GLTp*BlBRj?nz~2=HyMrjxZ5g)8mHQ@CA&
zqooT*0}(oVyBl*c3Kjs$@-+-yxD=ztg8d=FY|uovq{oGU()q#qD?S{ra_ApK@xe3@
zIK;7nYw=k+EiMh%w-;m-;$qv@hV^BMXbI9;&P7P@h^e{{od8~HBV11tOkp~tx4O+w
z+x8(zAs@dbOn4unZT#12)XQ!fVLB+4)qvY5%%7qoXKS1JQcXNjwIL6N-dZoH%H-4m
zRl%v53()!uzbRsyGlp>j-%C(Vn%4Zes;e)>ltWtgZO6l?9+%qv;M|tLIz->xL)>D=
z$Ki&&OI1wFLJNwT+-#PI5(9^VNj;`p39ZR1vw29N8>Tl(2A13dK|{jBd|+pQp-qAO
zK+nIB3uf2)z6gpFYJU+y>uj1;#5Zp8pSlzZnfMInQ!n&1HIz|Mlx9jk_~FD2(K9a=
z4HYBt443St)WR6s{i+u9%HkD_4~d!8HRRkKpH=)o1EF$Vz|+xF5tunOG_a4_L)SF2
zX6Ekl8Ec%cNu^p9FTHRkgxWCh>~DVg<Cca+Hb$=bnd@t~9+v2Cn-W+xXvTB}e%AkB
z&yWLr3;2IpyAp7yzW4vFq@*bA*+$BmaqsLiwrp*-vSg=WW|$0P8QH0%MWsZfETNL3
z^=nHcTSCa1HAS>2QD2npe@2_`^!$Fm|MPtB<9Xb9=RN0r&%2#_@7!}f>lv6z&)U^}
z{j^Th=d+smH0`0>rGvYCS9LdKZ|`o}_t(g3PjMV-T%lAfj}^>zH4k~&MCIffgh!;>
zKZ|G+7A{GvE{JoIErg#_&)6s0CrHL7U2L-P7VRngqU65kaB$8*z=j~-{<J}H;U^lE
zFQi6W582#YM=H0uSHJH|TiMA7w1k_>+}w?#7L9B>gN_%Q-EZ6G?&;}RE7U(2JW&&-
z_fO2}><&NYrCZd5)9dDC-6b*4fqn9qtUXD!iP>b`89o1rep%vhU0tU~a*cT6Xx-*U
z;nozC>45;d#PEO<aodVnpWKIYTC6FjL`f6Fi$$7F_dI@;Z+(<P;Y)krY+!Banwh@5
zYfeMo{e_{T)%(?|^^<O!99w1{+^HJ*usi6*b!%g4UT(mdI4IQru{wLuSs877?aowg
zz}_~q2d!1I1l1FQu6Aj{a~PXG<axXf3E=onzt3LOg4$3MF{t4}zR)SOO9rhICo(Sp
zg(en~l!AoQJ2zlrn8#JWbWRY2a|5PdEcCHhEh1L5mFT~YFb_MY;ljYwXkT{-Za809
z=2-Bw?}PNa$(G9)-Kvej@*aNqdiA~r3U3<t_#6JlGx;AmtcZ_wjHcEGwl$6$OWyFQ
zjXPXeusAPh+)m+5`>B*E4UxGw_VT_mFl%SJ|G3v44@T3>8v|U3ATC}O09ywjq(4?W
zVgbI-_ZJ2Lb@p<{nUB9-7=Xb3Wq)4o2>)@<y`Re+aX`Y)w+FPqLPzelXWu|lOu@3o
zZ!js`4aT^O8E0)}#>Gha29NT6_p)Eh6@P3$=J)$r#2M`NSzGzRT*Ci*Eh2`DCCvWe
zJ;sl-yrs2#YfR?aTOM+deR9~^%gKkD2m&iua)&l9MKSO=fr!<#1)`_vk1^SEeIFjb
z<hQYX_~2^GuK5i7nxjif%lGJyxywfv#bygc6P5^U-+^)+j_{w{dNbpYaa&(};)se@
zM8qYpe)NM;zr=BcH6a>Xl6PKRrF$bmy=vU`;DmOVYO|3<8lT9!);E?b|I*yCa9Bf?
zFjliL@Lj;Up%rdxYUEaiNB`Y<oRco2J>0h;)jx%C#^2v}EF=AGe{pefli=US4$}+*
z!zq}qbJ15%66+<~8{cux_v)l>SMzW?^Yq-hW$VsIMlKV%X(0G?Eur=T&MYC4rEh%G
zKwl5XGSc6v{PgmgCdHntV{acm30oYDu4kGwoIJ+weF-h~^mH+F;@q8`v0qc9o{Sm$
zPlOxD7!+5Qnr(N9O3ij_*rG2OHzih*;gCq(jdeLEP)(5ux2)B7AV7j~>DBA|@C^9V
z`sJ0IsKw{#)t{<888V+R%O7XZf-Cs2MzHj`$P?%1I!wgbR%`1<zibKCn-r3ZRXjeC
zHfBAqoPR<KZ{i&({%DnlG+DYuqxZ5@VsfOm(fo40X1+`T%z`HV1c_Cu0|u=kn_>@I
z?^anU><=YxmDf16dhA0-JMDFcfg)aI{+k2gi=PP8-CbQJNC@x!>w+`dM(TAdBVa-9
zn+Aa=%{MMObqp->^2k5BYv55qOR(QvR_m4ny+NbT_SBv#I!R3MZObfr8awpC;FV=<
zpk1w9`#kt%5loO-5VO#KBED=_&S2N1+=TgC<Dt9v>_3+TbiDtla%gqMde6{Dn2}or
zy}A2SO-EJwbZYF$NzltvkGI}0xAL?om0t5WxOHW=`-?V99Vz}Q*bnV@*yMJOr1kjO
z;q@KGJy%aFA3Z3imAEmr;;7M;i*r{d1s;~>6VZM-=3F;`%@(C771Yg<eecwn@XwJp
z;e8Ev#jrS8@uP?7x0Z-xTsJ1ZwL4X%&lfSgLBp#@KTWL9xMT3)+u)5?&Lj=1t6zWC
zJQkI%RunYUtu1z3nh{6{ZgR24QHJ>S=1Ni}>ZSxadh`XtO?7p_e_tB=IEC(ZyT5#k
zsZQ2<+K0YFZ4FlN^<`q+y$99A=mAY9Wk=*cj+=ipi#48FPVtRWNKn0iKeCBG;;4dT
za&y2y!EkSDir{tqYpwZFX#x#(H-=&HwBnuay1S#`c)hcSzwSS<zM#@PDVzOPy5-Kp
zu`DC4(fr7QYo(9Rm4xhjnL4egbavC_xtIFiHN5q7Q4rM|74NW0QVpp(c*$m$R#>?E
zw5dhfYd`B`(UGsEcjQDVWqUUG8T#$ZO1Zqmt1Wu$fhXVK8P<Hp_O>`Bi|TwYrM}D~
zjx2V4s65`KSwAGVAZzpe_$FsJ$c#?BQo=y1y9T{J;cBjq6NuS+>(!iC+3joBudO}L
zI+I$;P*hwd_6q+bBK&Kk^jEv?52Nwp-ac=;GIs0=K&9gfH(eVp+nZaOcfD3)tZisu
zcm5v5O_AaI6_2D?-Ft8|{T3hbaEs#9v(STj$4q*Usb1^UIT<N));qBNbCN}#_ygG^
zt@@~`3R~3<ACufYR*%rzgVfY@O+w5Uj%-TR&F?FZ`tUHt+kRJN(4A_Z4+mThUNE@r
zcG0+<Uldhl_%N_%xr0=vGxdR2wY2i>%n=FF<|GN{e;(SN(SG58-51q2D4|U^5QILw
zCc+2O=r_%RvaD-r!Yy-As?+c9&h2t|46)5lmDt^4l0?di^_KQGo6)mgKMzxJf8BY9
zzh+fRanYT>i243GJtvNaPzwq<FNK^Y^B;I9MA#P_s7dZTIM3p7$x4#Q^QrRW+;CcV
z-;I+tBjv%@Q?I47?DhH*iBH1L(3Xj!lf1;|+;|r_unCT_b+wJpF&y_i^TupjZ(mS!
zmsjC<QP`G;68T0Ab8NJY{$ZEeHCXMV@3m`|A+<}foco+~%C{7pcmLctbuF<lv^cJy
z|G02ldKj*Igik0&OMXGcU0<91f^89E?JX{nJq+xI(C9hoOPBadGIO$?@qIbXu7Ovd
zR_XOg9qO6BrJ!8jT@sml`2MR3r6&hFV1IGi(-#ZUwS_dX@%v?-e>{1sAz7{L%Z|_S
z%e}W27;@^n;|45iG!hl<bDuT8%Y4dqeIE)J@~y*9b{(pCrX*nXbZU_>TJD;gb4Xgw
zC869&`wu9W3K!gDLw1?zi0ewPw?#ted-^iRyelQ|tvst?Ua3)J^d&|wA$xCk2TLaG
zj6ge8kTxddU+vJKDbZ^sD0r7yPB<xZ;F+a`z~w!n4bN!oEAI6f9(yjLa<uLZCU|wr
zWweDy@kggRR_vTS6Dze~v964HR6)wRy?Jc|=TCf&Pmp<XEV`yJyX%I(ltp5Ee{uOc
zqHVbL<eKRlB9bp-1Fs03;TZUtUCz|qdP7-_IQZsGlyl*!ywxL1k4SFbXLQ>%I!SrE
zC1aQ5ZGyB=?#fT-mpbwOpC-yi3ahVf8S;qd*g55hsHlZ34_f7dZf%sD({W1naOBh8
z@uWjjLxzve){A&Z9WRk!T2Be)Q@bg`RwmbAY0C>X=LBpjvG`med(9j5L2s`Hdqr7f
ziErBTl7Dj0#HL^()PG}nKw|%=(QSmULTl4+nusqjr$&}8;oRmwny7uYj;0}?^qKr6
zQ>3rx&8l5)j(UPcR_7J(&I?&*d)}6BW0BeBT?ujzO0tXIjLLhb;uDRDn^o5YH9kM$
z97?)K4T2h6+=se1#u=#`9HbKLZS-C2AFA0*FG%tCZ{{0b@g>P9Eii0K+#c`SQW=}k
z%8&|Tnhw{ZgkGLEQOWi$(Iq`;-8*J1Hhn$x(F=umE2{LAo&9@vzrNfRX<bp?gYs(F
zq2&@ikMFM0T3?}xNjwy3A#!|q1uj1H)6m+*CC&$Iq<sARpDx+Dvd=*%Vsh-EImut4
zRd-RvwwGHcO4;pe*jb4Kv|@Z!Hce0@<*kSEgt>KGo=Q9}Uzt24&c-&Ja_Sn7x65-J
zl9*<8j!%A6_ZfVnqN8-WoPD$QQFX|^x(TQ0t_ef2V!QpJ<!>{31RBNlnBg*mM(Ww=
z?Hvm{o~Y%xi0{0oVx#~a534e%&(U15!Umm7W8P}E5EZ;P>~^|{fW9A(DdB%^8?5a;
z{%Hj!{-cqo-Nlx(3H6Iq!+YH1C5)*@D(3~Jrq`THWMZ!5V=k<FXJasTtyc5SywMmn
z4~+-9D@QH9W@J;^bS%sG1GLfu^LEcq%uo3|=GQkok7&jg3EHn`7PhP@bB$Kd<&~9}
zD1x-mNKt1eqO~VOkW%pWL}t|p;r<fa`n8W$(jpRETeW3fosyR+*p|_=cYj42hMtdB
z5Ye<=c3#u&y04evu_lWuy%DViD-KM4t*xp;$1@c@+MUIAQ4ek?kU81BYuA-FvYNG$
zS?@zMT3Xsfq+-Q-z6#5M2aShXEX6}^3TTY9y;?x?yqxZ^nsyS7+thmMXm0maQ<dBR
z!aC2&Eb$H$ZBVA*P?}m~sZmC6u*2m&Moo^kdFC~Vt)?}$`5}69W4Amg?AoyH+JN<S
z+jo1=EUP?|lF8|R-b}|VdmYK9b}E&z+48WKL`e>fl;gyDPK6o=ov*UpvlsRrnAVtB
zeWKqz=t#<%<7X`nzxyI&ELJ6C`07G`sfMApD1T*tX;*~*{KBW#ODVP63N`P%7ZFJ@
zAGC4G+AOtg_v0P&ZzWh&dEAc^b!J3mv|{IH8o4=jS4-zHTjO>s5C`LfFr#yXd=w1>
z_qX4ae-*c8M?haknO|$)!Q%V6w~otrL|@CCjBOe0xYy`2snF5y{`&9h8ta3V4H6UU
ztSl^VKlkX}Ul?evd4Pr&^!E8`XgBx6=<1k5Pkdj6WmW0kiM+I6%}_1tqfpxYTO!`{
zCc~A-6rDP?1LlS$Ps!NUEp^h}xzot34m$irF)M?znD^!f2Kc5wujXfNe+=jxDvipa
z{(=v+Fwp;j9SYC~er+DCs^RNSb#$R~P!4n_rkk4VVEzMH6qBYVYmPU74BWNp&dhE8
zo^%s`LsLh87e_KpRzrP}ss@$DcAzWyQr(ppOcp?H^g;o=RJHZm#`;^<a~XUozOL@B
zKn;|yE6dG`;;SY*(}V)fxnyNo)C>#9MNQViU?+;hX0u$F925pkM#D-lb`y%>2_Qp#
z*q$yZ7)_7`U3$_O6l0w&-#Y}K)MRJDkNRM_=%dO2yOgX9VU#f#CBUKN<>$ts`YO43
zNdtK^iZCsHHgoer&*+2(FI+1#EtFw2r2In=_wLS<>CR!Y-B4V?R0p;<M@`n-n@OX<
z3<nZ~p<<O7c&eik4AW^!5IB@z9E7FQF?dG;8U7*8eU_TP$pc>ifmmP%a7Nkb$)<Tb
z(mmB=x9-$OZFOdQalXk%k<k!dR{8tz|1GV6Z~klr;Lz`8Pz5{`0ME)~_~`&>PIU|f
zW0fGh5{x&6u@oGh0%O2O3IwStBRGNcK~wD+M9=@F=`7A!P1&BNASBdj`<U)taxU^H
zJQ_e!|H7%oaM5Xvsk9k9T3tUnz@%kxe85}o-_)7u^LrPn%3KG|GTpyzOsczq!GAWV
zt1CC&xqK8IwxhQzc<X+<&i^Nx{_OyuIsR1G-P@BjlR7j<Wjc$_)y)gU01PyyIZ_yG
zPk{OhlGoiGARq($U+^>n1gxyOD)^Ga<gn=KGl{9m;(R+{F}aCGVNu<j)MR~?XmkeE
zo5hh;2jM_-rK5pAzxjj3RR4A|(^C0YZ-4OC3<;z@aGdhb!lJ$i^)DU&UPr&GDcuc}
zP#<os1ZQ3xFnU++8TXOPtThim(q8UQSuYPB4snwRn4FtNayk@l4nlYs4`Xrone>x|
zAX&f$()7P@e9s5DnRK#|bFul~6AJWi%Js9{FAvJAtA8JhuP2=WkiuaMW)Zgpeml76
z+5jh>f%--x18jN#%*#DcZr^Ay77K&?{f!2Z2^g^57=nhyaRJJ?$B(w$bAT?-{WqJ2
z=R%(UMk5n&1n`(^HVvLJAcBS=VF4Zjf)4^?K+r(R0U+YP@d2Ij09cua238;;XjlS<
zcN};e4-JDQ^XdX2Z2*Hg9PH1AkU_u`5jq3?2oMhqOC%v=KoACpLG}j`U<j!nnT*FG
z><mLBJefxZfq+Bsadjc`(6D5%Q0w<`0J%KAgvdnhMGp7)83$xC2I&u=FqzEjBO(^$
z-0y7(L;?>D2h57#19EXlJ{UtJBJD>e5n%)kAiI+QehWe_2@4^{0R!6uBq;<Rfy5Ip
zWC97#!-vE0_#9XdSn2ovU?K!iz<;Na31lpCynu$x8HkMI)erQCN7@CCBP0C*%m?xK
z63_q;IYJj49vSx#9uE=@!AF3|Fj6j=2xEB0L4?63hDaGi0@4PcKOP$pz;6KvUvk3_
zStGbKJP#j^%%cnFkAU<Cfk5W*8<_+nV-3_R0ukAkOvFOGHo)@6I38ps!UjMtPkw;R
z;;kh_95SA{set1dFCN2F!$5yTq@9Tb5*bMY7T|)}aF1U(O~fPXE=0!Tc;*TsxWq=<
zfJg#K&C?%&H|~i9o;V=m@w_^dfRhmYk+5J=LD0xVB5(c@iO8A_#zEwXHP9aqABl+N
z)sINR@x(I`a3XcVlXz+|;36SyMTSX#h;cHne~Dz?S_h^SWL|>)cw(H4#o>7TOX8W!
zATkO3{XPy5_arhR?m?X-BIi|5?_s2W!F+^4&UxTs147nL7{<YPgfC&x5}B8voDz6*
z3($DtnMD4>90eg|Kx8tGH{Zx0yO1^j6%dhcz%DS-AHV<v{H(SdPb!l|_gti^ic&UV
z`qQ}=XDDT3Hk*S28l#L+%G=!-Y!rwHlri|-QU_(NO2mWVK)?+I2v`q`hczK^FRQ7o
psjW-W*45Qk|L+{Oi@>joUL2|?XXcU+T;O40?4peub&a+x`ag!(7<m8y

literal 0
HcmV?d00001

diff --git a/src/DXYZ b/src/DXYZ
new file mode 100644
index 0000000..e5eaca0
--- /dev/null
+++ b/src/DXYZ
@@ -0,0 +1,5 @@
+C
+C     Elemental derivative operators
+C
+      common /dxyz/ dxm1(lx1,lx1),  dxtm1(lx1,lx1)
+
diff --git a/src/INPUT b/src/INPUT
new file mode 100644
index 0000000..124d572
--- /dev/null
+++ b/src/INPUT
@@ -0,0 +1,19 @@
+C
+C     Input parameters from preprocessors.
+C
+C     Note that in parallel implementations, we distinguish between
+C     distributed data (LELT) and uniformly distributed data.
+C
+
+      common /input5/ xc(8,lelt),yc(8,lelt),zc(8,lelt)
+     $               ,bc(5,6,lelt,0:ldimt1)
+
+
+      common /input8/ cbc(6,lelt,0:ldimt1),ccurve(12,lelt)
+      character*1     ccurve
+      character*3     cbc
+
+      real mflops
+      integer*8 flop_a, flop_cg
+      common /cflops/ flop_a,flop_cg,mflops
+
diff --git a/src/MASS b/src/MASS
new file mode 100644
index 0000000..16cde4d
--- /dev/null
+++ b/src/MASS
@@ -0,0 +1,4 @@
+      common /mass/
+     $       bm1   (lx1,ly1,lz1,lelt)
+     $      ,binvm1(lx1,ly1,lz1,lelt)
+     $      ,volvm1
diff --git a/src/PARALLEL b/src/PARALLEL
new file mode 100644
index 0000000..67372b4
--- /dev/null
+++ b/src/PARALLEL
@@ -0,0 +1,31 @@
+C
+C     Communication information
+C     NOTE: NID is stored in 'SIZE' for greater accessibility
+      common /cube1/ node,pid,np,nullpid,node0
+      integer        node,pid,np,nullpid,node0
+
+
+c     Maximum number of elements (limited to 2**31/12, at least for now)
+      parameter(nelgt_max = 178956970)
+
+      common /hcglb/ nvtot,nelgf(0:ldimt1)
+     $              ,lglel(lelt)
+c    $              ,gllel(lelg)
+c    $              ,gllnid(lelg)
+     $              ,nelgv,nelgt
+
+      integer        lglel
+c     integer        gllel,gllnid
+      integer*8      nvtot
+
+      common /diagl/  ifgprnt
+      logical ifgprnt
+      common/precsn/ wdsize,isize,lsize,csize
+      common/precsl/ ifdblas
+      integer wdsize,isize,lsize,csize
+      logical ifdblas
+C
+C     crystal-router, gather-scatter, and xxt handles (xxt=csr grid solve)
+C
+      common /comm_handles/ cr_h, gsh, gsh_fld(0:ldimt1), xxth(ldimt1)
+      integer               cr_h, gsh, gsh_fld          , xxth
diff --git a/src/README b/src/README
new file mode 100644
index 0000000..8996aa9
--- /dev/null
+++ b/src/README
@@ -0,0 +1,66 @@
+  Nek_comm-1.0
+
+This is the communication testing kernel for the MPI 
+all reduce and point to point communication used within
+the nekbone mini-app and standard NEK5000.  This kernel
+runs a battery of platform timers using MPI standard.
+
+To Run:
+
+  NOTE - Unlike the other nek codes, a data.rea file
+	is not needed, since there is no geometry
+	being set up.
+
+  After untarring nek_comm-1.0.tgz, change working 
+  directory to the nek_comm/test/ directory:
+     cd  ~/nek_comm/test/
+
+  Edit the makenek script to specify the compiler and
+  appropriate compiler flags.
+
+  Compile and link the code using the makenek script:
+      ./makenek n
+  where n is the chosen name of test run.
+
+  A successful compilation will result with:
+
+  #############################################################
+  #                  Compilation successful!                  #
+  #############################################################
+  
+  And a nekcomm executable.
+
+  Run the code in parallel using the provided nekpmpi script
+  by specifying the test name (for logfile naming purposes) 
+  and the number of processors.  For example, to run a test
+  called 'n' on 4 processes:
+     ./nekpmpi n 4
+
+  This will produce a logfile, n.log.4.
+
+
+Interpreting results:
+
+  The logfile will have a header describing the parameters the
+  test was ran with and the output of the timing tests.
+
+  All reduce tests are output with the a 'gop' tag:
+		np  nwds time1 time2
+  where,
+	 np - number of processors
+	 nwds - number of words
+	 tmsg - time per message
+ 	 tpwd - time per word
+
+
+  Point to point tests are output with the 'pg' tag:
+		nodeb np nloop nwds tmsg tpwd
+  where,
+	 nodeb - the second processor node 0 is 
+	 testing with
+	 np - number of processors
+	 nloop - number of tests ran with these nodes
+	 nwds - number of words per message
+	 tmsg - time per message
+	 tpwd - time per word  
+        
diff --git a/src/TIMER b/src/TIMER
new file mode 100644
index 0000000..60a761c
--- /dev/null
+++ b/src/TIMER
@@ -0,0 +1,19 @@
+
+      integer tmax
+      parameter (tmax = 1024)
+    
+      integer gopi(tmax)
+
+      real  ttemp1, ttemp2, ttemp3, ttemp4
+
+      real trzero(tmax), tcopy(tmax), tsolvem(tmax)
+      real tglsc3a(tmax), tglsc3b(tmax), tglsc3c(tmax), tglsc3d(tmax)
+      real tadd2s1(tmax), tadd2s2a(tmax), tadd2s2b(tmax), tadd2s2c(tmax)
+      real tlocalgrad3(tmax), twrwswt(tmax), tlocalgrad3t(tmax)
+      real tgsop(tmax), tgop(4,tmax)
+
+      real*8 dnekclock
+
+      common /timer/ trzero, tcopy, tsolvem, tglsc3a, tglsc3b, tglsc3c,
+     +tglsc3d, tadd2s1, tadd2s2a, tadd2s2b, tadd2s2c, tlocalgrad3,
+     +twrwswt, tlocalgrad3t, tgsop, tgop, gopi
diff --git a/src/TOTAL b/src/TOTAL
new file mode 100644
index 0000000..814bc18
--- /dev/null
+++ b/src/TOTAL
@@ -0,0 +1,5 @@
+      include 'DXYZ'
+      include 'INPUT'
+      include 'MASS'
+      include 'PARALLEL'
+      include 'WZ'
diff --git a/src/WZ b/src/WZ
new file mode 100644
index 0000000..502e87c
--- /dev/null
+++ b/src/WZ
@@ -0,0 +1,7 @@
+
+
+c     Gauss-Labotto and Gauss points
+      common /gauss/ zgm1(lx1,3)
+
+c     Weights
+      common /wxyz/ wxm1(lx1), wym1(ly1), wzm1(lz1), w3m1(lx1,ly1,lz1)
diff --git a/src/bg_aligned3.s b/src/bg_aligned3.s
new file mode 100644
index 0000000..8a5ab9a
--- /dev/null
+++ b/src/bg_aligned3.s
@@ -0,0 +1,41 @@
+.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4
+.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9
+.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14
+.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19
+.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24
+.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29
+.set r30,30; .set r31,31
+.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4
+.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9
+.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14
+.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19
+.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24
+.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29
+.set f30,30; .set f31,31
+
+.file "bg_aligned3.s"
+
+.globl bg_aligned3
+.type  bg_aligned3, @function
+.size  bg_aligned3, 48
+
+.section ".text"
+.align 2
+
+bg_aligned3:
+  andi.    r0,r3,15
+  clrlwi   r9,r4,28
+  cmpwi    cr7,r9,0
+  li       r3,0
+  li       r0,0
+  bne-     .L.das_label.58
+  andi.    r9,r5,15
+  bne-     cr7,.L.das_label.58
+  bne-     .L.das_label.58
+  li       r0,1
+ .L.das_label.58:
+  stw      r0,0(r6)
+  blr     
+
+
+.ident "GCC: (GNU) 3.2"
diff --git a/src/bg_mxm3.s b/src/bg_mxm3.s
new file mode 100644
index 0000000..ac8ffe5
--- /dev/null
+++ b/src/bg_mxm3.s
@@ -0,0 +1,406 @@
+.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4
+.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9
+.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14
+.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19
+.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24
+.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29
+.set r30,30; .set r31,31
+.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4
+.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9
+.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14
+.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19
+.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24
+.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29
+.set f30,30; .set f31,31
+
+.file "bg_mxm3.s"
+
+.globl bg_mxm3
+.type  bg_mxm3, @function
+.size  bg_mxm3, 1412
+
+.section ".text"
+.align 2
+
+bg_mxm3:
+  stwu     r1,-96(r1)
+  mflr     r0
+  stw      r0,100(r1)
+  andi.    r0,r7,15
+  stw      r15,28(r1)
+  mr       r15,r8
+  stw      r16,32(r1)
+  mr       r16,r6
+  stw      r25,68(r1)
+  mr       r25,r5
+  stw      r28,80(r1)
+  mr       r28,r4
+  stw      r30,88(r1)
+  mr       r30,r7
+  stw      r31,92(r1)
+  mr       r31,r3
+  stw      r14,24(r1)
+  stw      r17,36(r1)
+  stw      r18,40(r1)
+  stw      r19,44(r1)
+  stw      r20,48(r1)
+  stw      r21,52(r1)
+  stw      r22,56(r1)
+  stw      r23,60(r1)
+  stw      r26,72(r1)
+  stw      r27,76(r1)
+  stw      r29,84(r1)
+  bne-     .L.das_label.15
+ .L.das_label.3:
+  lis      r23,dummy@ha
+  li       r12,0
+  addi     r23,r23,dummy@l
+  li       r13,0
+  li       r27,16
+  addi     r23,r23,-16
+  stfpdux  f13,r23,r27
+  stfpdux  f14,r23,r27
+  stfpdux  f15,r23,r27
+  stfpdux  f16,r23,r27
+  stfpdux  f17,r23,r27
+  stfpdux  f18,r23,r27
+  stfpdux  f19,r23,r27
+  stfpdux  f20,r23,r27
+  stfpdux  f21,r23,r27
+  stfpdux  f22,r23,r27
+  stfpdux  f23,r23,r27
+  stfpdux  f24,r23,r27
+  stfpdux  f25,r23,r27
+  stfpdux  f26,r23,r27
+  stfpdux  f27,r23,r27
+  stfpdux  f28,r23,r27
+  stfpdux  f29,r23,r27
+  stfpdux  f30,r23,r27
+  stfpdux  f31,r23,r27
+  lis      r9,10922
+  lwz      r4,0(r28)
+  ori      r9,r9,43691
+  lwz      r15,0(r15)
+  lwz      r6,0(r16)
+  mulhw    r14,r4,r9
+  srawi    r11,r15,31
+  srawi    r16,r6,1
+  addze    r16,r16
+  srawi    r0,r4,31
+  mulhw    r15,r15,r9
+  subf     r14,r0,r14
+  rlwinm   r26,r16,4,0,27
+  mulli    r22,r16,80
+  subf     r15,r11,r15
+  subfic   r22,r22,16
+  mfctr    r29
+  li       r18,0
+  rlwinm   r28,r4,3,0,28
+  cmpw     r18,r15
+  addi     r28,r28,-32
+  addi     r19,r16,-2
+  cmpwi    cr7,r12,0
+  bge-     .L.das_label.10
+ .L.das_label.4:
+  mullw    r23,r4,r18
+  li       r17,0
+  cmpw     r17,r14
+  mulli    r23,r23,48
+  add      r23,r30,r23
+  addi     r23,r23,-16
+  bge-     .L.das_label.9
+  mullw    r9,r16,r18
+  cmpwi    cr6,r19,0
+  mulli    r9,r9,96
+  mulli    r0,r6,40
+  add      r9,r9,r0
+  add      r9,r9,r25
+  addi     r11,r9,-16
+ .L.das_label.5:
+  mulli    r20,r17,48
+  rlwinm   r0,r4,3,0,28
+  add      r20,r31,r20
+  subf     r20,r0,r20
+  addi     r20,r20,32
+  lfpdux   f1,r20,r28
+  mr       r21,r11
+  lfpdux   f7,r21,r22
+  lfpdux   f2,r20,r27
+  lfpdux   f3,r20,r27
+  lfpdux   f8,r21,r26
+  lfpdux   f9,r21,r26
+  lfpdux   f4,r20,r28
+  lfpdux   f10,r21,r26
+  lfpdux   f5,r20,r27
+  lfpdux   f11,r21,r26
+  lfpdux   f12,r21,r26
+  fxpmul   f13,f7,f1
+  fxpmul   f16,f8,f1
+  lfpdux   f6,r20,r27
+  fxpmul   f19,f9,f1
+  fxpmul   f22,f10,f1
+  fxpmul   f25,f11,f1
+  fxpmul   f28,f12,f1
+  fxpmul   f14,f7,f2
+  lfpdux   f1,r20,r28
+  fxpmul   f17,f8,f2
+  fxpmul   f20,f9,f2
+  fxpmul   f23,f10,f2
+  fxpmul   f26,f11,f2
+  fxpmul   f29,f12,f2
+  fxpmul   f15,f7,f3
+  lfpdux   f2,r20,r27
+  fxpmul   f18,f8,f3
+  fxpmul   f21,f9,f3
+  fxpmul   f24,f10,f3
+  fxpmul   f27,f11,f3
+  fxpmul   f30,f12,f3
+  fxcsmadd f13,f7,f4,f13
+  lfpdux   f3,r20,r27
+  fxcsmadd f16,f8,f4,f16
+  fxcsmadd f14,f7,f5,f14
+  fxcsmadd f17,f8,f5,f17
+  fxcsmadd f19,f9,f4,f19
+  fxcsmadd f22,f10,f4,f22
+  fxcsmadd f25,f11,f4,f25
+  fxcsmadd f28,f12,f4,f28
+  fxcsmadd f20,f9,f5,f20
+  fxcsmadd f15,f7,f6,f15
+  lfpdux   f4,r20,r28
+  fxcsmadd f18,f8,f6,f18
+  lfpdux   f7,r21,r22
+  fxcsmadd f23,f10,f5,f23
+  fxcsmadd f26,f11,f5,f26
+  fxcsmadd f29,f12,f5,f29
+  lfpdux   f8,r21,r26
+  fxcsmadd f21,f9,f6,f21
+  fxcsmadd f24,f10,f6,f24
+  lfpdux   f5,r20,r27
+  lfpdux   f9,r21,r26
+  lfpdux   f10,r21,r26
+  fxcsmadd f27,f11,f6,f27
+  fxcsmadd f30,f12,f6,f30
+  lfpdux   f11,r21,r26
+  beq-     cr6,.L.das_label.6
+  mtctr    r19
+
+.__loopk:
+  lfpdux   f12,r21,r26
+  fxcpmadd f13,f7,f1,f13
+  fxcpmadd f16,f8,f1,f16
+  lfpdux   f6,r20,r27
+  fxcpmadd f19,f9,f1,f19
+  fxcpmadd f22,f10,f1,f22
+  fxcpmadd f25,f11,f1,f25
+  fxcpmadd f28,f12,f1,f28
+  fxcpmadd f14,f7,f2,f14
+  lfpdux   f1,r20,r28
+  fxcpmadd f17,f8,f2,f17
+  fxcpmadd f20,f9,f2,f20
+  fxcpmadd f23,f10,f2,f23
+  fxcpmadd f26,f11,f2,f26
+  fxcpmadd f29,f12,f2,f29
+  fxcpmadd f15,f7,f3,f15
+  lfpdux   f2,r20,r27
+  fxcpmadd f18,f8,f3,f18
+  fxcpmadd f21,f9,f3,f21
+  fxcpmadd f24,f10,f3,f24
+  fxcpmadd f27,f11,f3,f27
+  fxcpmadd f30,f12,f3,f30
+  fxcsmadd f13,f7,f4,f13
+  lfpdux   f3,r20,r27
+  fxcsmadd f16,f8,f4,f16
+  fxcsmadd f14,f7,f5,f14
+  fxcsmadd f17,f8,f5,f17
+  fxcsmadd f19,f9,f4,f19
+  fxcsmadd f22,f10,f4,f22
+  fxcsmadd f25,f11,f4,f25
+  fxcsmadd f28,f12,f4,f28
+  fxcsmadd f20,f9,f5,f20
+  fxcsmadd f15,f7,f6,f15
+  lfpdux   f4,r20,r28
+  fxcsmadd f18,f8,f6,f18
+  lfpdux   f7,r21,r22
+  fxcsmadd f23,f10,f5,f23
+  fxcsmadd f26,f11,f5,f26
+  fxcsmadd f29,f12,f5,f29
+  lfpdux   f8,r21,r26
+  fxcsmadd f21,f9,f6,f21
+  fxcsmadd f24,f10,f6,f24
+  lfpdux   f5,r20,r27
+  lfpdux   f9,r21,r26
+  lfpdux   f10,r21,r26
+  fxcsmadd f27,f11,f6,f27
+  fxcsmadd f30,f12,f6,f30
+  lfpdux   f11,r21,r26
+  bdnz+    .__loopk
+ .L.das_label.6:
+  lfpdux   f12,r21,r26
+  fxcpmadd f13,f7,f1,f13
+  fxcpmadd f14,f7,f2,f14
+  fxcpmadd f15,f7,f3,f15
+  lfpdux   f6,r20,r27
+  fxcpmadd f16,f8,f1,f16
+  fxcpmadd f17,f8,f2,f17
+  fxcpmadd f18,f8,f3,f18
+  fxcsmadd f13,f7,f4,f13
+  fxcsmadd f14,f7,f5,f14
+  fxcsmadd f15,f7,f6,f15
+  fxcsmadd f16,f8,f4,f16
+  fxcsmadd f17,f8,f5,f17
+  fxcsmadd f18,f8,f6,f18
+  stfpdux  f13,r23,r27
+  fxcpmadd f19,f9,f1,f19
+  fxcpmadd f20,f9,f2,f20
+  stfpdux  f14,r23,r27
+  fxcpmadd f21,f9,f3,f21
+  fxcpmadd f22,f10,f1,f22
+  stfpdux  f15,r23,r27
+  fxcpmadd f23,f10,f2,f23
+  fxcpmadd f24,f10,f3,f24
+  stfpdux  f16,r23,r28
+  fxcsmadd f19,f9,f4,f19
+  fxcsmadd f20,f9,f5,f20
+  stfpdux  f17,r23,r27
+  fxcsmadd f21,f9,f6,f21
+  fxcsmadd f22,f10,f4,f22
+  stfpdux  f18,r23,r27
+  fxcsmadd f23,f10,f5,f23
+  fxcsmadd f24,f10,f6,f24
+  stfpdux  f19,r23,r28
+  fxcpmadd f25,f11,f1,f25
+  fxcpmadd f26,f11,f2,f26
+  stfpdux  f20,r23,r27
+  fxcpmadd f27,f11,f3,f27
+  fxcpmadd f28,f12,f1,f28
+  stfpdux  f21,r23,r27
+  fxcpmadd f29,f12,f2,f29
+  fxcpmadd f30,f12,f3,f30
+  stfpdux  f22,r23,r28
+  clrlwi   r0,r23,28
+  subfic   r10,r13,0
+  adde     r9,r10,r13
+  neg      r0,r0
+  rlwinm   r0,r0,1,31,31
+  and.     r10,r0,r9
+  beq-     .L.das_label.7
+  mr       r13,r23
+ .L.das_label.7:
+  fxcsmadd f25,f11,f4,f25
+  fxcsmadd f26,f11,f5,f26
+  stfpdux  f23,r23,r27
+  clrlwi   r0,r23,28
+  mfcr     r9
+  rlwinm   r9,r9,31,31,31
+  neg      r0,r0
+  rlwinm   r0,r0,1,31,31
+  and.     r10,r0,r9
+  beq-     .L.das_label.8
+  mr       r12,r23
+  cmpwi    cr7,r23,0
+ .L.das_label.8:
+  fxcsmadd f27,f11,f6,f27
+  fxcsmadd f28,f12,f4,f28
+  stfpdux  f24,r23,r27
+  fxcsmadd f29,f12,f5,f29
+  fxcsmadd f30,f12,f6,f30
+  stfpdux  f25,r23,r28
+  stfpdux  f26,r23,r27
+  stfpdux  f27,r23,r27
+  stfpdux  f28,r23,r28
+  stfpdux  f29,r23,r27
+  stfpdux  f30,r23,r27
+  addi     r17,r17,1
+  mulli    r0,r4,40
+  cmpw     r17,r14
+  subf     r23,r0,r23
+  blt+     .L.das_label.5
+ .L.das_label.9:
+  addi     r18,r18,1
+  cmpw     r18,r15
+  blt+     .L.das_label.4
+ .L.das_label.10:
+  mtctr    r29
+  lis      r23,dummy@ha
+  addi     r23,r23,dummy@l
+  addi     r23,r23,-16
+  lfpdux   f13,r23,r27
+  lfpdux   f14,r23,r27
+  lfpdux   f15,r23,r27
+  lfpdux   f16,r23,r27
+  lfpdux   f17,r23,r27
+  lfpdux   f18,r23,r27
+  lfpdux   f19,r23,r27
+  lfpdux   f20,r23,r27
+  lfpdux   f21,r23,r27
+  lfpdux   f22,r23,r27
+  lfpdux   f23,r23,r27
+  lfpdux   f24,r23,r27
+  lfpdux   f25,r23,r27
+  lfpdux   f26,r23,r27
+  lfpdux   f27,r23,r27
+  lfpdux   f28,r23,r27
+  lfpdux   f29,r23,r27
+  lfpdux   f30,r23,r27
+  lfpdux   f31,r23,r27
+  bne-     cr7,.L.das_label.14
+ .L.das_label.11:
+  cmpwi    r13,0
+  bne-     .L.das_label.13
+ .L.das_label.12:
+  lwz      r0,100(r1)
+  li       r3,3
+  lwz      r14,24(r1)
+  lwz      r15,28(r1)
+  mtlr     r0
+  lwz      r16,32(r1)
+  lwz      r17,36(r1)
+  lwz      r18,40(r1)
+  lwz      r19,44(r1)
+  lwz      r20,48(r1)
+  lwz      r21,52(r1)
+  lwz      r22,56(r1)
+  lwz      r23,60(r1)
+  lwz      r25,68(r1)
+  lwz      r26,72(r1)
+  lwz      r27,76(r1)
+  lwz      r28,80(r1)
+  lwz      r29,84(r1)
+  lwz      r30,88(r1)
+  lwz      r31,92(r1)
+  addi     r1,r1,96
+  blr     
+ .L.das_label.13:
+  lis      r3,.rodata@ha
+  mr       r4,r13
+  addi     r3,r3,.rodata@l
+  crclr    4*cr1+eq
+  bl       printf
+  b        .L.das_label.12
+ .L.das_label.14:
+  lis      r3,.rodata+0x00000018@ha
+  mr       r4,r12
+  addi     r3,r3,.rodata+0x00000018@l
+  crclr    4*cr1+eq
+  bl       printf
+  b        .L.das_label.11
+ .L.das_label.15:
+  lis      r3,.rodata+0x00000030@ha
+  mr       r4,r7
+  addi     r3,r3,.rodata+0x00000030@l
+  crclr    4*cr1+eq
+  bl       printf
+  b        .L.das_label.3
+
+.comm dummy,512,16
+
+
+.section ".rodata","a"
+.align 2
+  .ascii "ALIGNMENT PROBS1: %x\n"
+.align 3
+  .ascii "ALIGNMENT PROBS: %x\n"
+.align 3
+  .ascii "C PROB: %x\n"
+
+.ident "GCC: (GNU) 3.2"
diff --git a/src/bg_mxm44.s b/src/bg_mxm44.s
new file mode 100644
index 0000000..60ab4e3
--- /dev/null
+++ b/src/bg_mxm44.s
@@ -0,0 +1,497 @@
+.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4
+.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9
+.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14
+.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19
+.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24
+.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29
+.set r30,30; .set r31,31
+.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4
+.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9
+.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14
+.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19
+.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24
+.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29
+.set f30,30; .set f31,31
+
+.file "bg_mxm44.s"
+
+.globl bg_mxm44
+.type  bg_mxm44, @function
+.size  bg_mxm44, 1756
+
+
+
+.section ".text"
+.align 2
+
+
+bg_mxm44:
+  stwu     r1,-576(r1)
+  stw      r14,360(r1)
+  mr       r12,r1
+  stfd     f14,432(r1)
+  li       r14,16
+  stfd     f15,440(r1)
+  stfd     f16,448(r1)
+  stfd     f17,456(r1)
+  stfd     f18,464(r1)
+  stfd     f19,472(r1)
+  stfd     f20,480(r1)
+  stfd     f21,488(r1)
+  stfd     f22,496(r1)
+  stfd     f23,504(r1)
+  stfd     f24,512(r1)
+  stfd     f25,520(r1)
+  stfd     f26,528(r1)
+  stfd     f27,536(r1)
+  stfd     f28,544(r1)
+  stfd     f29,552(r1)
+  stfd     f30,560(r1)
+  stfd     f31,568(r1)
+  stw      r15,364(r1)
+  stw      r16,368(r1)
+  stw      r17,372(r1)
+  stw      r18,376(r1)
+  stw      r19,380(r1)
+  stw      r20,384(r1)
+  stw      r21,388(r1)
+  stw      r22,392(r1)
+  stw      r23,396(r1)
+  stw      r24,400(r1)
+  stw      r25,404(r1)
+  stw      r26,408(r1)
+  stw      r28,416(r1)
+  stw      r29,420(r1)
+  stw      r30,424(r1)
+  stfpdux  f14,r12,r14
+  stfpdux  f15,r12,r14
+  stfpdux  f16,r12,r14
+  stfpdux  f17,r12,r14
+  stfpdux  f18,r12,r14
+  stfpdux  f19,r12,r14
+  stfpdux  f20,r12,r14
+  stfpdux  f21,r12,r14
+  stfpdux  f22,r12,r14
+  stfpdux  f23,r12,r14
+  stfpdux  f24,r12,r14
+  stfpdux  f25,r12,r14
+  stfpdux  f26,r12,r14
+  stfpdux  f27,r12,r14
+  stfpdux  f28,r12,r14
+  stfpdux  f29,r12,r14
+  stfpdux  f30,r12,r14
+  stfpdux  f31,r12,r14
+  lis      r11,.rodata@ha
+  addi     r10,r11,.rodata@l
+  lwz      r4,0(r4)
+  lfd      f23,0(r10)
+  lwz      r6,0(r6)
+  fmr      f0,f23
+  lwz      r8,0(r8)
+  fmr      f1,f23
+  fmr      f2,f23
+  fmr      f3,f23
+  fmr      f8,f23
+  fmr      f9,f23
+  fmr      f10,f23
+  fmr      f11,f23
+  fmr      f16,f23
+  fmr      f17,f23
+  fmr      f18,f23
+  fmr      f19,f23
+  fmr      f20,f23
+  fmr      f21,f23
+  fmr      f22,f23
+  li       r13,16
+  rlwinm   r9,r4,5,0,26
+  rlwinm   r19,r6,5,0,26
+  rlwinm   r14,r4,3,0,28
+  mr       r25,r14
+  rlwinm   r12,r6,3,0,28
+  mulli    r11,r6,-3
+  srawi    r26,r4,2
+  srawi    r28,r8,2
+  rlwinm   r29,r26,2,0,29
+  cmpw     cr6,r29,r4
+  mulli    r18,r6,-4
+  addi     r18,r18,2
+  addi     r11,r11,2
+  li       r10,0
+  rlwinm   r11,r11,3,0,28
+  rlwinm   r18,r18,3,0,28
+  mullw    r23,r4,r6
+  addi     r23,r23,-4
+  rlwinm   r23,r23,3,0,28
+  neg      r23,r23
+  subf     r14,r13,r14
+  add      r23,r23,r14
+  srawi    r16,r6,2
+  mr       r22,r7
+  cmpw     r6,r6
+  rlwinm   r29,r16,2,0,29
+  cmpw     cr7,r29,r6
+  mr       r24,r5
+  addi     r16,r16,-1
+  mr       r15,r7
+  li       r29,0
+
+.grabNgo_jloop:
+  subf     r20,r23,r3
+  subf     r21,r18,r24
+  addi     r30,r15,-32
+  li       r17,0
+
+.grabNgo_iloop:
+  fxcpmadd f16,f8,f0,f16
+  lfpdux   f4,r20,r23
+  fxcpmadd f17,f8,f1,f17
+  lfpdux   f5,r20,r13
+  fxcpmadd f18,f9,f0,f18
+  lfpdux   f12,r21,r18
+  fxcpmadd f19,f9,f1,f19
+  lfpdux   f13,r21,r12
+  fxcpmadd f20,f10,f0,f20
+  lfpdux   f14,r21,r12
+  fxcpmadd f21,f10,f1,f21
+  lfpdux   f15,r21,r12
+  fxcpmadd f22,f11,f0,f22
+  lfpdux   f6,r20,r14
+  fxcpmadd f23,f11,f1,f23
+  lfpdux   f7,r20,r13
+  fxcsmadd f24,f8,f2,f16
+  addi     r17,r17,1
+  fxcsmadd f25,f8,f3,f17
+  mtctr    r16
+  fxcsmadd f26,f9,f2,f18
+  addi     r30,r30,32
+  fxcsmadd f27,f9,f3,f19
+  cmpw     cr1,r17,r26
+  fxcsmadd f28,f10,f2,f20
+  lfpdux   f0,r20,r14
+  fxcsmadd f29,f10,f3,f21
+  lfpdux   f1,r20,r13
+  fxcsmadd f30,f11,f2,f22
+  fxcsmadd f31,f11,f3,f23
+  fxpmul   f16,f12,f4
+  stfpdux  f24,r22,r10
+  fxpmul   f17,f12,f5
+  stfpdux  f25,r22,r13
+  fxpmul   f18,f13,f4
+  stfpdux  f26,r22,r14
+  fxpmul   f19,f13,f5
+  stfpdux  f27,r22,r13
+  fxpmul   f20,f14,f4
+  stfpdux  f28,r22,r14
+  fxpmul   f21,f14,f5
+  stfpdux  f29,r22,r13
+  fxpmul   f22,f15,f4
+  stfpdux  f30,r22,r14
+  fxpmul   f23,f15,f5
+  stfpdux  f31,r22,r13
+  fxcsmadd f16,f12,f6,f16
+  lfpdux   f2,r20,r14
+  fxcsmadd f17,f12,f7,f17
+  lfpdux   f3,r20,r13
+  fxcsmadd f18,f13,f6,f18
+  lfpdux   f8,r21,r11
+  fxcsmadd f19,f13,f7,f19
+  lfpdux   f9,r21,r12
+  fxcsmadd f20,f14,f6,f20
+  lfpdux   f10,r21,r12
+  fxcsmadd f21,f14,f7,f21
+  lfpdux   f11,r21,r12
+  fxcsmadd f22,f15,f6,f22
+  fxcsmadd f23,f15,f7,f23
+  beq-     cr7,.grabNgo_k_even4
+  fxcpmadd f16,f8,f0,f16
+  lfpdux   f24,r20,r14
+  fxcpmadd f17,f8,f1,f17
+  lfpdux   f25,r20,r13
+  fxcpmadd f18,f9,f0,f18
+  lfpdux   f26,r21,r11
+  fxcpmadd f19,f9,f1,f19
+  lfpdux   f27,r21,r12
+  fxcpmadd f20,f10,f0,f20
+  lfpdux   f28,r21,r12
+  fxcpmadd f21,f10,f1,f21
+  lfpdux   f29,r21,r12
+  fxcpmadd f22,f11,f0,f22
+  lfpdux   f30,r20,r14
+  fxcpmadd f23,f11,f1,f23
+  lfpdux   f31,r20,r13
+  fxcsmadd f16,f8,f2,f16
+  fxcsmadd f17,f8,f3,f17
+  fxcsmadd f18,f9,f2,f18
+  fxcsmadd f19,f9,f3,f19
+  fxcsmadd f20,f10,f2,f20
+  fxcsmadd f21,f10,f3,f21
+  fxcsmadd f22,f11,f2,f22
+  fxcsmadd f23,f11,f3,f23
+  fxcpmadd f16,f26,f24,f16
+  lfpdux   f4,r20,r14
+  fxcpmadd f17,f26,f25,f17
+  lfpdux   f5,r20,r13
+  fxcpmadd f18,f27,f24,f18
+  lfpdux   f6,r20,r14
+  fxcpmadd f19,f27,f25,f19
+  lfpdux   f7,r20,r13
+  fxcpmadd f20,f28,f24,f20
+  lfpdux   f12,r21,r11
+  fxcpmadd f21,f28,f25,f21
+  lfpdux   f13,r21,r12
+  fxcpmadd f22,f29,f24,f22
+  lfpdux   f14,r21,r12
+  fxcpmadd f23,f29,f25,f23
+  lfpdux   f15,r21,r12
+  fxcsmadd f16,f26,f30,f16
+  mr       r22,r30
+  fxcsmadd f17,f26,f31,f17
+  fxcsmadd f18,f27,f30,f18
+  fxcsmadd f19,f27,f31,f19
+  fxcsmadd f20,f28,f30,f20
+  fxcsmadd f21,f28,f31,f21
+  fxcsmadd f22,f29,f30,f22
+  fxcsmadd f23,f29,f31,f23
+  b        .grabNgo_kloop_k4
+
+.grabNgo_k_even4:
+.grabNgo_kloop:
+  fxcpmadd f16,f8,f0,f16
+  lfpdux   f4,r20,r14
+  fxcpmadd f17,f8,f1,f17
+  lfpdux   f5,r20,r13
+  fxcpmadd f18,f9,f0,f18
+  lfpdux   f6,r20,r14
+  fxcpmadd f19,f9,f1,f19
+  lfpdux   f7,r20,r13
+  fxcpmadd f20,f10,f0,f20
+  lfpdux   f12,r21,r11
+  fxcpmadd f21,f10,f1,f21
+  lfpdux   f13,r21,r12
+  fxcpmadd f22,f11,f0,f22
+  lfpdux   f14,r21,r12
+  fxcpmadd f23,f11,f1,f23
+  lfpdux   f15,r21,r12
+  fxcsmadd f16,f8,f2,f16
+  mr       r22,r30
+  fxcsmadd f17,f8,f3,f17
+  fxcsmadd f18,f9,f2,f18
+  fxcsmadd f19,f9,f3,f19
+  fxcsmadd f20,f10,f2,f20
+  fxcsmadd f21,f10,f3,f21
+  fxcsmadd f22,f11,f2,f22
+  fxcsmadd f23,f11,f3,f23
+
+.grabNgo_kloop_k4:
+  fxcpmadd f16,f12,f4,f16
+  lfpdux   f0,r20,r14
+  fxcpmadd f17,f12,f5,f17
+  lfpdux   f1,r20,r13
+  fxcpmadd f18,f13,f4,f18
+  lfpdux   f2,r20,r14
+  fxcpmadd f19,f13,f5,f19
+  lfpdux   f3,r20,r13
+  fxcpmadd f20,f14,f4,f20
+  lfpdux   f8,r21,r11
+  fxcpmadd f21,f14,f5,f21
+  lfpdux   f9,r21,r12
+  fxcpmadd f22,f15,f4,f22
+  lfpdux   f10,r21,r12
+  fxcpmadd f23,f15,f5,f23
+  lfpdux   f11,r21,r12
+  fxcsmadd f16,f12,f6,f16
+  fxcsmadd f17,f12,f7,f17
+  fxcsmadd f18,f13,f6,f18
+  fxcsmadd f19,f13,f7,f19
+  fxcsmadd f20,f14,f6,f20
+  fxcsmadd f21,f14,f7,f21
+  fxcsmadd f22,f15,f6,f22
+  fxcsmadd f23,f15,f7,f23
+  bdnz+    .grabNgo_k_even4
+  blt+     cr1,.grabNgo_iloop
+  add      r24,r24,r19
+  add      r15,r15,r9
+  beq-     cr6,.grabNgo_n1even4
+  lfpdux   f4,r20,r23
+  lfpdux   f5,r20,r25
+  lfpdux   f12,r21,r18
+  lfpdux   f13,r21,r12
+  lfpdux   f14,r21,r12
+  lfpdux   f15,r21,r12
+  mtctr    r16
+  fxpmul   f24,f12,f4
+  fxpmul   f25,f13,f4
+  fxpmul   f26,f14,f4
+  fxpmul   f27,f15,f4
+  fxcsmadd f24,f12,f5,f24
+  fxcsmadd f25,f13,f5,f25
+  fxcsmadd f26,f14,f5,f26
+  fxcsmadd f27,f15,f5,f27
+  lfpdux   f6,r20,r25
+  lfpdux   f7,r20,r25
+  lfpdux   f28,r21,r11
+  lfpdux   f29,r21,r12
+  lfpdux   f30,r21,r12
+  lfpdux   f31,r21,r12
+  beq-     cr7,.grabNgo_k_even4_2
+  fxcpmadd f24,f28,f6,f24
+  fxcpmadd f25,f29,f6,f25
+  fxcpmadd f26,f30,f6,f26
+  fxcpmadd f27,f31,f6,f27
+  fxcsmadd f24,f28,f7,f24
+  fxcsmadd f25,f29,f7,f25
+  fxcsmadd f26,f30,f7,f26
+  fxcsmadd f27,f31,f7,f27
+  lfpdux   f6,r20,r25
+  lfpdux   f7,r20,r25
+  lfpdux   f28,r21,r11
+  lfpdux   f29,r21,r12
+  lfpdux   f30,r21,r12
+  lfpdux   f31,r21,r12
+
+.grabNgo_k_even4_2:
+  fxcpmadd f24,f28,f6,f24
+  lfpdux   f4,r20,r25
+  fxcpmadd f25,f29,f6,f25
+  lfpdux   f5,r20,r25
+  fxcpmadd f26,f30,f6,f26
+  lfpdux   f12,r21,r11
+  fxcpmadd f27,f31,f6,f27
+  lfpdux   f13,r21,r12
+  fxcsmadd f24,f28,f7,f24
+  lfpdux   f14,r21,r12
+  fxcsmadd f25,f29,f7,f25
+  lfpdux   f15,r21,r12
+  fxcsmadd f26,f30,f7,f26
+  fxcsmadd f27,f31,f7,f27
+  fxcpmadd f24,f12,f4,f24
+  lfpdux   f6,r20,r25
+  fxcpmadd f25,f13,f4,f25
+  lfpdux   f7,r20,r25
+  fxcpmadd f26,f14,f4,f26
+  lfpdux   f28,r21,r11
+  fxcpmadd f27,f15,f4,f27
+  lfpdux   f29,r21,r12
+  fxcsmadd f24,f12,f5,f24
+  lfpdux   f30,r21,r12
+  fxcsmadd f25,f13,f5,f25
+  lfpdux   f31,r21,r12
+  fxcsmadd f26,f14,f5,f26
+  fxcsmadd f27,f15,f5,f27
+  bdnz+    .grabNgo_k_even4_2
+  fxcpmadd f24,f28,f6,f24
+  fxcpmadd f25,f29,f6,f25
+  fxcpmadd f26,f30,f6,f26
+  fxcpmadd f27,f31,f6,f27
+  fxcsmadd f24,f28,f7,f24
+  fxcsmadd f25,f29,f7,f25
+  fxcsmadd f26,f30,f7,f26
+  fxcsmadd f27,f31,f7,f27
+  addi     r30,r30,32
+  stfpdux  f24,r30,r10
+  stfpdux  f25,r30,r25
+  stfpdux  f26,r30,r25
+  stfpdux  f27,r30,r25
+
+.grabNgo_n1even4:
+  addi     r29,r29,1
+  cmpw     cr5,r29,r28
+  blt+     cr5,.grabNgo_jloop
+  fxcpmadd f16,f8,f0,f16
+  fxcpmadd f17,f8,f1,f17
+  fxcpmadd f18,f9,f0,f18
+  fxcpmadd f19,f9,f1,f19
+  fxcpmadd f20,f10,f0,f20
+  fxcpmadd f21,f10,f1,f21
+  fxcpmadd f22,f11,f0,f22
+  fxcpmadd f23,f11,f1,f23
+  fxcsmadd f16,f8,f2,f16
+  fxcsmadd f17,f8,f3,f17
+  fxcsmadd f18,f9,f2,f18
+  fxcsmadd f19,f9,f3,f19
+  fxcsmadd f20,f10,f2,f20
+  fxcsmadd f21,f10,f3,f21
+  fxcsmadd f22,f11,f2,f22
+  fxcsmadd f23,f11,f3,f23
+  stfpdux  f16,r22,r10
+  stfpdux  f17,r22,r13
+  stfpdux  f18,r22,r14
+  stfpdux  f19,r22,r13
+  stfpdux  f20,r22,r14
+  stfpdux  f21,r22,r13
+  stfpdux  f22,r22,r14
+  stfpdux  f23,r22,r13
+  mr       r3,r1
+  li       r0,16
+  lfpdux   f14,r3,r0
+  lfpdux   f15,r3,r0
+  lfpdux   f16,r3,r0
+  lfpdux   f17,r3,r0
+  lfpdux   f18,r3,r0
+  lfpdux   f19,r3,r0
+  lfpdux   f20,r3,r0
+  lfpdux   f21,r3,r0
+  lfpdux   f22,r3,r0
+  lfpdux   f23,r3,r0
+  lfpdux   f24,r3,r0
+  lfpdux   f25,r3,r0
+  lfpdux   f26,r3,r0
+  lfpdux   f27,r3,r0
+  lfpdux   f28,r3,r0
+  lfpdux   f29,r3,r0
+  lfpdux   f30,r3,r0
+  lfpdux   f31,r3,r0
+  lwz      r14,360(r1)
+  li       r3,0
+  lwz      r15,364(r1)
+  lwz      r16,368(r1)
+  lwz      r17,372(r1)
+  lwz      r18,376(r1)
+  lwz      r19,380(r1)
+  lwz      r20,384(r1)
+  lwz      r21,388(r1)
+  lwz      r22,392(r1)
+  lwz      r23,396(r1)
+  lwz      r24,400(r1)
+  lwz      r25,404(r1)
+  lwz      r26,408(r1)
+  lwz      r28,416(r1)
+  lwz      r29,420(r1)
+  lwz      r30,424(r1)
+  lfd      f14,432(r1)
+  lfd      f15,440(r1)
+  lfd      f16,448(r1)
+  lfd      f17,456(r1)
+  lfd      f18,464(r1)
+  lfd      f19,472(r1)
+  lfd      f20,480(r1)
+  lfd      f21,488(r1)
+  lfd      f22,496(r1)
+  lfd      f23,504(r1)
+  lfd      f24,512(r1)
+  lfd      f25,520(r1)
+  lfd      f26,528(r1)
+  lfd      f27,536(r1)
+  lfd      f28,544(r1)
+  lfd      f29,552(r1)
+  lfd      f30,560(r1)
+  lfd      f31,568(r1)
+  addi     r1,r1,576
+  blr     
+
+.section ".rodata","a"
+.align 3
+  .long 0x00000000
+  .long 0x00000000
+
+
+.section ".data","wa"
+.align 3
+.type  seconds_per_cycle, @object
+.size  seconds_per_cycle, 8
+seconds_per_cycle:
+  .long 0x3e188aec
+  .long 0x70377bb0
+
+
+.ident "GCC: (GNU) 3.2"
diff --git a/src/bg_mxm44_uneven.s b/src/bg_mxm44_uneven.s
new file mode 100644
index 0000000..15bf7b9
--- /dev/null
+++ b/src/bg_mxm44_uneven.s
@@ -0,0 +1,82 @@
+.set r0,0; .set r1,1; .set r2,2; .set r3,3; .set r4,4
+.set r5,5; .set r6,6; .set r7,7; .set r8,8; .set r9,9
+.set r10,10; .set r11,11; .set r12,12; .set r13,13; .set r14,14
+.set r15,15; .set r16,16; .set r17,17; .set r18,18; .set r19,19
+.set r20,20; .set r21,21; .set r22,22; .set r23,23; .set r24,24
+.set r25,25; .set r26,26; .set r27,27; .set r28,28; .set r29,29
+.set r30,30; .set r31,31
+.set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4
+.set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9
+.set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14
+.set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19
+.set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24
+.set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29
+.set f30,30; .set f31,31
+
+.file "bg_mxm44_uneven.s"
+
+.globl bg_mxm44_uneven
+.type  bg_mxm44_uneven, @function
+.size  bg_mxm44_uneven, 220
+
+.section ".text"
+.align 2
+
+bg_mxm44_uneven:
+  stwu     r1,-64(r1)
+  mflr     r0
+  stw      r0,68(r1)
+  stw      r28,48(r1)
+  lwz      r9,0(r8)
+  addi     r8,r1,8
+  lwz      r28,0(r4)
+  srawi    r0,r9,2
+  addze    r0,r0
+  stw      r23,28(r1)
+  rlwinm   r0,r0,2,0,29
+  stw      r24,32(r1)
+  subf     r0,r0,r9
+  stw      r25,36(r1)
+  subf     r9,r0,r9
+  stw      r0,12(r1)
+  stw      r9,8(r1)
+  mr       r23,r4
+  stw      r26,40(r1)
+  mr       r24,r6
+  stw      r27,44(r1)
+  mr       r26,r5
+  stw      r29,52(r1)
+  mr       r27,r7
+  mr       r25,r3
+  lwz      r29,0(r6)
+  crclr    4*cr1+eq
+  bl       bg_mxm44
+  addi     r8,r1,12
+  lwz      r0,8(r1)
+  mr       r3,r25
+  mr       r4,r23
+  mr       r6,r24
+  mullw    r28,r28,r0
+  mullw    r29,r29,r0
+  rlwinm   r28,r28,3,0,28
+  add      r27,r27,r28
+  mr       r7,r27
+  rlwinm   r29,r29,3,0,28
+  add      r26,r26,r29
+  mr       r5,r26
+  crclr    4*cr1+eq
+  bl       mxm44_0
+  lwz      r29,52(r1)
+  lwz      r23,28(r1)
+  mr       r3,r0
+  lwz      r24,32(r1)
+  lwz      r0,68(r1)
+  lwz      r25,36(r1)
+  lwz      r26,40(r1)
+  mtlr     r0
+  lwz      r27,44(r1)
+  lwz      r28,48(r1)
+  addi     r1,r1,64
+  blr     
+
+.ident "GCC: (GNU) 3.2"
diff --git a/src/blas.f b/src/blas.f
new file mode 100644
index 0000000..e0129cc
--- /dev/null
+++ b/src/blas.f
@@ -0,0 +1,30886 @@
+      subroutine caxpy(n,ca,cx,incx,cy,incy)
+c
+c     constant times a vector plus a vector.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ca
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if (abs(real(ca)) + abs(aimag(ca)) .eq. 0.0 ) return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        cy(iy) = cy(iy) + ca*cx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        cy(i) = cy(i) + ca*cx(i)
+   30 continue
+      return
+      end
+      subroutine  ccopy(n,cx,incx,cy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*)
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        cy(iy) = cx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        cy(i) = cx(i)
+   30 continue
+      return
+      end
+      complex function cdotc(n,cx,incx,cy,incy)
+c
+c     forms the dot product of two vectors, conjugating the first
+c     vector.
+c     jack dongarra, linpack,  3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ctemp
+      integer i,incx,incy,ix,iy,n
+c
+      ctemp = (0.0,0.0)
+      cdotc = (0.0,0.0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ctemp = ctemp + conjg(cx(ix))*cy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      cdotc = ctemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ctemp = ctemp + conjg(cx(i))*cy(i)
+   30 continue
+      cdotc = ctemp
+      return
+      end
+      complex function cdotu(n,cx,incx,cy,incy)
+c
+c     forms the dot product of two vectors.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ctemp
+      integer i,incx,incy,ix,iy,n
+c
+      ctemp = (0.0,0.0)
+      cdotu = (0.0,0.0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ctemp = ctemp + cx(ix)*cy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      cdotu = ctemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ctemp = ctemp + cx(i)*cy(i)
+   30 continue
+      cdotu = ctemp
+      return
+      end
+      SUBROUTINE CGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + CONJG( A( K + I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + CONJG( A( K + I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CGBMV .
+*
+      END
+      SUBROUTINE CGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            CONJA, CONJB, NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
+*     B  respectively are to be  transposed but  not conjugated  and set
+*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
+*     and the number of rows of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      CONJA = LSAME( TRANSA, 'C' )
+      CONJB = LSAME( TRANSB, 'C' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.CONJA                ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.CONJB                ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE IF( CONJA )THEN
+*
+*           Form  C := alpha*conjg( A' )*B + beta*C.
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 150, J = 1, N
+               DO 140, I = 1, M
+                  TEMP = ZERO
+                  DO 130, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  130             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  140          CONTINUE
+  150       CONTINUE
+         END IF
+      ELSE IF( NOTA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A*conjg( B' ) + beta*C.
+*
+            DO 200, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 160, I = 1, M
+                     C( I, J ) = ZERO
+  160             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 170, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  170             CONTINUE
+               END IF
+               DO 190, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*CONJG( B( J, L ) )
+                     DO 180, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  180                CONTINUE
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A*B'          + beta*C
+*
+            DO 250, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 210, I = 1, M
+                     C( I, J ) = ZERO
+  210             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 220, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  220             CONTINUE
+               END IF
+               DO 240, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 230, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+  250       CONTINUE
+         END IF
+      ELSE IF( CONJA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C.
+*
+            DO 280, J = 1, N
+               DO 270, I = 1, M
+                  TEMP = ZERO
+                  DO 260, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*CONJG( B( J, L ) )
+  260             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  270          CONTINUE
+  280       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*conjg( A' )*B' + beta*C
+*
+            DO 310, J = 1, N
+               DO 300, I = 1, M
+                  TEMP = ZERO
+                  DO 290, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*B( J, L )
+  290             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  300          CONTINUE
+  310       CONTINUE
+         END IF
+      ELSE
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A'*conjg( B' ) + beta*C
+*
+            DO 340, J = 1, N
+               DO 330, I = 1, M
+                  TEMP = ZERO
+                  DO 320, L = 1, K
+                     TEMP = TEMP + A( L, I )*CONJG( B( J, L ) )
+  320             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  330          CONTINUE
+  340       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 370, J = 1, N
+               DO 360, I = 1, M
+                  TEMP = ZERO
+                  DO 350, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  350             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  360          CONTINUE
+  370       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CGEMM .
+*
+      END
+      SUBROUTINE CGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               IF( NOCONJ )THEN
+                  DO 90, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = 1, M
+                     TEMP = TEMP + CONJG( A( I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               IF( NOCONJ )THEN
+                  DO 120, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = 1, M
+                     TEMP = TEMP + CONJG( A( I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CGEMV .
+*
+      END
+      SUBROUTINE CGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGERC  performs the rank 1 operation
+*
+*     A := alpha*x*conjg( y' ) + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGERC ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*CONJG( Y( JY ) )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*CONJG( Y( JY ) )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CGERC .
+*
+      END
+      SUBROUTINE CGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGERU  performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGERU ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CGERU .
+*
+      END
+      SUBROUTINE CHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + CONJG( A( L + I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*REAL( A( KPLUS1, J ) )
+     $                         + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + CONJG( A( L + I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( KPLUS1, J ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*REAL( A( 1, J ) )
+               L      = 1      - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + CONJG( A( L + I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( 1, J ) )
+               L       = 1       - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + CONJG( A( L + I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHBMV .
+*
+      END
+      SUBROUTINE CHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars, A is an hermitian matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  hermitian matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  hermitian  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Note that the imaginary parts  of the diagonal elements need
+*           not be set, they are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*CONJG(  A( K, I ) )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*CONJG(  A( K, I ) )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*REAL( A( J, J ) )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*CONJG( A( J, K ) )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*CONJG( A( J, K ) )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHEMM .
+*
+      END
+      SUBROUTINE CHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + CONJG( A( I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + CONJG( A( I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + CONJG( A( I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + CONJG( A( I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHEMV .
+*
+      END
+      SUBROUTINE CHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHER2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHER2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( J ) )
+                  TEMP2 = CONJG( ALPHA*X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( JY ) )
+                  TEMP2 = CONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*CONJG( Y( J ) )
+                  TEMP2     = CONJG( ALPHA*X( J ) )
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*CONJG( Y( JY ) )
+                  TEMP2     = CONJG( ALPHA*X( JX ) )
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX        = JX
+                  IY        = JY
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHER2 .
+*
+      END
+      SUBROUTINE CHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      REAL               BETA
+      COMPLEX            ALPHA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHER2K  performs one of the hermitian rank 2k operations
+*
+*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
+*
+*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n
+*  hermitian matrix and  A and B  are  n by k matrices in the first case
+*  and  k by n  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
+*                                         conjg( alpha )*B*conjg( A' ) +
+*                                         beta*C.
+*
+*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
+*                                         conjg( alpha )*conjg( B' )*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER        ( ONE  = 1.0E+0 )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHER2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.REAL( ZERO ) )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.REAL( ZERO ) )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+                  DO 70, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
+*                   C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.REAL( ZERO ) )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*CONJG( B( J, L ) )
+                     TEMP2 = CONJG( ALPHA*A( J, L ) )
+                     DO 110, I = 1, J - 1
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  110                CONTINUE
+                     C( J, J ) = REAL( C( J, J ) )         +
+     $                           REAL( A( J, L )*TEMP1 +
+     $                                 B( J, L )*TEMP2   )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.REAL( ZERO ) )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*CONJG( B( J, L ) )
+                     TEMP2 = CONJG( ALPHA*A( J, L ) )
+                     DO 160, I = J + 1, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  160                CONTINUE
+                     C( J, J ) = REAL( C( J, J ) )         +
+     $                           REAL( A( J, L )*TEMP1 +
+     $                                 B( J, L )*TEMP2   )
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
+*                   C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( I.EQ.J )THEN
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( J, J ) = REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     ELSE
+                        C( J, J ) = BETA*REAL( C( J, J ) )         +
+     $                              REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA *C( I, J ) +
+     $                              ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J )
+  220             CONTINUE
+                  IF( I.EQ.J )THEN
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( J, J ) = REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     ELSE
+                        C( J, J ) = BETA*REAL( C( J, J ) )         +
+     $                              REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA *C( I, J ) +
+     $                              ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHER2K.
+*
+      END
+      SUBROUTINE CHER  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHER   performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( X( J )*TEMP )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( X( JX )*TEMP )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP      = ALPHA*CONJG( X( J ) )
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( J ) )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP      = ALPHA*CONJG( X( JX ) )
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( JX ) )
+                  IX        = JX
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHER  .
+*
+      END
+      SUBROUTINE CHERK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHERK  performs one of the hermitian rank k operations
+*
+*     C := alpha*A*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*A + beta*C,
+*
+*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+*  matrix in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX, CONJG, MAX, REAL
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      REAL               RTEMP
+      COMPLEX            TEMP
+*     .. Parameters ..
+      REAL               ONE ,         ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHERK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+                  DO 70, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*conjg( A' ) + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.CMPLX( ZERO ) )THEN
+                     TEMP = ALPHA*CONJG( A( J, L ) )
+                     DO 110, I = 1, J - 1
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                     C( J, J ) = REAL( C( J, J )      ) +
+     $                           REAL( TEMP*A( I, L ) )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+                  DO 150, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.CMPLX( ZERO ) )THEN
+                     TEMP      = ALPHA*CONJG( A( J, L ) )
+                     C( J, J ) = REAL( C( J, J )      )   +
+     $                           REAL( TEMP*A( J, L ) )
+                     DO 160, I = J + 1, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 220, J = 1, N
+               DO 200, I = 1, J - 1
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+               RTEMP = ZERO
+               DO 210, L = 1, K
+                  RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J )
+  210          CONTINUE
+               IF( BETA.EQ.ZERO )THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) )
+               END IF
+  220       CONTINUE
+         ELSE
+            DO 260, J = 1, N
+               RTEMP = ZERO
+               DO 230, L = 1, K
+                  RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J )
+  230          CONTINUE
+               IF( BETA.EQ.ZERO )THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) )
+               END IF
+               DO 250, I = J + 1, N
+                  TEMP = ZERO
+                  DO 240, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*A( L, J )
+  240             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  250          CONTINUE
+  260       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHERK .
+*
+      END
+      SUBROUTINE CHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + CONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*REAL( AP( KK + J - 1 ) )
+     $                         + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + CONJG( AP( K ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK + J - 1 ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*REAL( AP( KK ) )
+               K      = KK     + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + CONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK ) )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + CONJG( AP( K ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHPMV .
+*
+      END
+      SUBROUTINE CHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPR2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( J ) )
+                  TEMP2 = CONJG( ALPHA*X( J ) )
+                  K     = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) +
+     $                               REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( JY ) )
+                  TEMP2 = CONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) +
+     $                               REAL( X( JX )*TEMP1 +
+     $                                     Y( JY )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1   = ALPHA*CONJG( Y( J ) )
+                  TEMP2   = CONJG( ALPHA*X( J ) )
+                  AP( KK ) = REAL( AP( KK ) ) +
+     $                       REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1    = ALPHA*CONJG( Y( JY ) )
+                  TEMP2    = CONJG( ALPHA*X( JX ) )
+                  AP( KK ) = REAL( AP( KK ) ) +
+     $                       REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX       = JX
+                  IY       = JY
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHPR2 .
+*
+      END
+      SUBROUTINE CHPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPR    performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( J ) )
+                  K    = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+     $                               + REAL( X( J )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+     $                               + REAL( X( JX )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP     = ALPHA*CONJG( X( J ) )
+                  AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( J ) )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP    = ALPHA*CONJG( X( JX ) )
+                  AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( JX ) )
+                  IX      = JX
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHPR  .
+*
+      END
+      subroutine crotg(ca,cb,c,s)
+      complex ca,cb,s
+      real c
+      real norm,scale
+      complex alpha
+      if (cabs(ca) .ne. 0.) go to 10
+         c = 0.
+         s = (1.,0.)
+         ca = cb
+         go to 20
+   10 continue
+         scale = cabs(ca) + cabs(cb)
+         norm = scale * sqrt((cabs(ca/scale))**2 + (cabs(cb/scale))**2)
+         alpha = ca /cabs(ca)
+         c = cabs(ca) / norm
+         s = alpha * conjg(cb) / norm
+         ca = alpha * norm
+   20 continue
+      return
+      end
+      subroutine  cscal(n,ca,cx,incx)
+c
+c     scales a vector by a constant.
+c     jack dongarra, linpack,  3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex ca,cx(*)
+      integer i,incx,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        cx(i) = ca*cx(i)
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        cx(i) = ca*cx(i)
+   30 continue
+      return
+      end
+      subroutine  csrot (n,cx,incx,cy,incy,c,s)
+c
+c     applies a plane rotation, where the cos and sin (c and s) are real
+c     and the vectors cx and cy are complex.
+c     jack dongarra, linpack, 3/11/78.
+c
+      complex cx(1),cy(1),ctemp
+      real c,s
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ctemp = c*cx(ix) + s*cy(iy)
+        cy(iy) = c*cy(iy) - s*cx(ix)
+        cx(ix) = ctemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ctemp = c*cx(i) + s*cy(i)
+        cy(i) = c*cy(i) - s*cx(i)
+        cx(i) = ctemp
+   30 continue
+      return
+      end
+      subroutine  csscal(n,sa,cx,incx)
+c
+c     scales a complex vector by a real constant.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*)
+      real sa
+      integer i,incx,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i)))
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i)))
+   30 continue
+      return
+      end
+      subroutine  cswap (n,cx,incx,cy,incy)
+c
+c     interchanges two vectors.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ctemp
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ctemp = cx(ix)
+        cx(ix) = cy(iy)
+        cy(iy) = ctemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+   20 do 30 i = 1,n
+        ctemp = cx(i)
+        cx(i) = cy(i)
+        cy(i) = ctemp
+   30 continue
+      return
+      end
+      SUBROUTINE CSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where  alpha and beta are scalars, A is a symmetric matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CSYMM .
+*
+      END
+      SUBROUTINE CSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' +
+*                                         beta*C.
+*
+*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSYR2K.
+*
+      END
+      SUBROUTINE CSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSYRK .
+*
+      END
+      SUBROUTINE CTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 90, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( KPLUS1, J ) )
+                     DO 100, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 120, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( KPLUS1, J ) )
+                     DO 130, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 150, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( 1, J ) )
+                     DO 160, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 180, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( 1, J ) )
+                     DO 190, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTBMV .
+*
+      END
+      SUBROUTINE CTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     DO 90, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 100, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     DO 120, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 130, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     DO 150, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 160, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( 1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     DO 180, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 190, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( 1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTBSV .
+*
+      END
+      SUBROUTINE CTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK     - 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    - 1
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + CONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 120, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 130, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + CONJG( AP( K ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  K    = KK     + 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    + 1
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + CONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 180, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 190, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + CONJG( AP( K ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTPMV .
+*
+      END
+      SUBROUTINE CTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    + 1
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - CONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 120, K = KK, KK + J - 2
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 130, K = KK, KK + J - 2
+                        TEMP = TEMP - CONJG( AP( K ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    - 1
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - CONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK - N + J ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - CONJG( AP( K ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK - N + J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTPSV .
+*
+      END
+      SUBROUTINE CTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX            ALPHA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 120, J = 1, N
+                  DO 110, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 90, K = 1, I - 1
+                           TEMP = TEMP + A( K, I )*B( K, J )
+   90                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*CONJG( A( I, I ) )
+                        DO 100, K = 1, I - 1
+                           TEMP = TEMP + CONJG( A( K, I ) )*B( K, J )
+  100                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  110             CONTINUE
+  120          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 130, K = I + 1, M
+                           TEMP = TEMP + A( K, I )*B( K, J )
+  130                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*CONJG( A( I, I ) )
+                        DO 140, K = I + 1, M
+                           TEMP = TEMP + CONJG( A( K, I ) )*B( K, J )
+  140                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 200, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 170, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  170             CONTINUE
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+  200          CONTINUE
+            ELSE
+               DO 240, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 210, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  210             CONTINUE
+                  DO 230, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 220, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  220                   CONTINUE
+                     END IF
+  230             CONTINUE
+  240          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ).
+*
+            IF( UPPER )THEN
+               DO 280, K = 1, N
+                  DO 260, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*CONJG( A( J, K ) )
+                        END IF
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*CONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            ELSE
+               DO 320, K = N, 1, -1
+                  DO 300, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*CONJG( A( J, K ) )
+                        END IF
+                        DO 290, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  290                   CONTINUE
+                     END IF
+  300             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*CONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 310, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  310                CONTINUE
+                  END IF
+  320          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRMM .
+*
+      END
+      SUBROUTINE CTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + A( I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 120, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 130, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + A( I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 180, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 190, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRMV .
+*
+      END
+      SUBROUTINE CTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX            ALPHA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B
+*           or    B := alpha*inv( conjg( A' ) )*B.
+*
+            IF( UPPER )THEN
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 110, K = 1, I - 1
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  110                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 120, K = 1, I - 1
+                           TEMP = TEMP - CONJG( A( K, I ) )*B( K, J )
+  120                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/CONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  130             CONTINUE
+  140          CONTINUE
+            ELSE
+               DO 180, J = 1, N
+                  DO 170, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 150, K = I + 1, M
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  150                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 160, K = I + 1, M
+                           TEMP = TEMP - CONJG( A( K, I ) )*B( K, J )
+  160                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/CONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  170             CONTINUE
+  180          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 230, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 190, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  190                CONTINUE
+                  END IF
+                  DO 210, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 220, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  220                CONTINUE
+                  END IF
+  230          CONTINUE
+            ELSE
+               DO 280, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 240, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  240                CONTINUE
+                  END IF
+                  DO 260, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 270, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' )
+*           or    B := alpha*B*inv( conjg( A' ) ).
+*
+            IF( UPPER )THEN
+               DO 330, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/CONJG( A( K, K ) )
+                     END IF
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+                  DO 310, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = CONJG( A( J, K ) )
+                        END IF
+                        DO 300, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  300                   CONTINUE
+                     END IF
+  310             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 320, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  320                CONTINUE
+                  END IF
+  330          CONTINUE
+            ELSE
+               DO 380, K = 1, N
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/CONJG( A( K, K ) )
+                     END IF
+                     DO 340, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  340                CONTINUE
+                  END IF
+                  DO 360, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = CONJG( A( J, K ) )
+                        END IF
+                        DO 350, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  350                   CONTINUE
+                     END IF
+  360             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 370, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  370                CONTINUE
+                  END IF
+  380          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRSM .
+*
+      END
+      SUBROUTINE CTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 120, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 130, I = 1, J - 1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 180, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 190, I = N, J + 1, -1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRSV .
+*
+      END
+      double precision function dasum(n,dx,incx)
+c
+c     takes the sum of the absolute values.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dtemp
+      integer i,incx,m,mp1,n,nincx
+c
+      dasum = 0.0d0
+      dtemp = 0.0d0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        dtemp = dtemp + dabs(dx(i))
+   10 continue
+      dasum = dtemp
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,6)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dtemp = dtemp + dabs(dx(i))
+   30 continue
+      if( n .lt. 6 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,6
+        dtemp = dtemp + dabs(dx(i)) + dabs(dx(i + 1)) + dabs(dx(i + 2))
+     *  + dabs(dx(i + 3)) + dabs(dx(i + 4)) + dabs(dx(i + 5))
+   50 continue
+   60 dasum = dtemp
+      return
+      end
+      subroutine daxpy(n,da,dx,incx,dy,incy)
+c
+c     constant times a vector plus a vector.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),da
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if (da .eq. 0.0d0) return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dy(iy) = dy(iy) + da*dx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,4)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dy(i) = dy(i) + da*dx(i)
+   30 continue
+      if( n .lt. 4 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,4
+        dy(i) = dy(i) + da*dx(i)
+        dy(i + 1) = dy(i + 1) + da*dx(i + 1)
+        dy(i + 2) = dy(i + 2) + da*dx(i + 2)
+        dy(i + 3) = dy(i + 3) + da*dx(i + 3)
+   50 continue
+      return
+      end
+      double precision function dcabs1(z)
+      double complex z,zz
+      double precision t(2)
+      equivalence (zz,t(1))
+      zz = z
+      dcabs1 = dabs(t(1)) + dabs(t(2))
+      return
+      end
+      subroutine  dcopy(n,dx,incx,dy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*)
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dy(iy) = dx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,7)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dy(i) = dx(i)
+   30 continue
+      if( n .lt. 7 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,7
+        dy(i) = dx(i)
+        dy(i + 1) = dx(i + 1)
+        dy(i + 2) = dx(i + 2)
+        dy(i + 3) = dx(i + 3)
+        dy(i + 4) = dx(i + 4)
+        dy(i + 5) = dx(i + 5)
+        dy(i + 6) = dx(i + 6)
+   50 continue
+      return
+      end
+      double precision function ddot(n,dx,incx,dy,incy)
+c
+c     forms the dot product of two vectors.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),dtemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      ddot = 0.0d0
+      dtemp = 0.0d0
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dtemp = dtemp + dx(ix)*dy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      ddot = dtemp
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dtemp = dtemp + dx(i)*dy(i)
+   30 continue
+      if( n .lt. 5 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) +
+     *   dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4)
+   50 continue
+   60 ddot = dtemp
+      return
+      end
+      SUBROUTINE DGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DGBMV .
+*
+      END
+      SUBROUTINE DGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X',
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = B'.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+*     and  columns of  A  and the  number of  rows  of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And if  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      ELSE
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B' + beta*C
+*
+            DO 170, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 130, I = 1, M
+                     C( I, J ) = ZERO
+  130             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 140, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  140             CONTINUE
+               END IF
+               DO 160, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 150, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+  170       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 200, J = 1, N
+               DO 190, I = 1, M
+                  TEMP = ZERO
+                  DO 180, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  180             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DGEMM .
+*
+      END
+      SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               DO 90, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               DO 110, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DGEMV .
+*
+      END
+      SUBROUTINE DGER  ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGER   performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DGER  .
+*
+      END
+      DOUBLE PRECISION FUNCTION DNRM2 ( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      DOUBLE PRECISION                  X( * )
+*     ..
+*
+*  DNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     DNRM2 := sqrt( x'*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to DLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION      ONE         , ZERO
+      PARAMETER           ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      DOUBLE PRECISION      ABSXI, NORM, SCALE, SSQ
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE IF( N.EQ.1 )THEN
+         NORM  = ABS( X( 1 ) )
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL DLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( X( IX ).NE.ZERO )THEN
+               ABSXI = ABS( X( IX ) )
+               IF( SCALE.LT.ABSXI )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/ABSXI )**2
+                  SCALE = ABSXI
+               ELSE
+                  SSQ   = SSQ   +     ( ABSXI/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      DNRM2 = NORM
+      RETURN
+*
+*     End of DNRM2.
+*
+      END
+      subroutine  drot (n,dx,incx,dy,incy,c,s)
+c
+c     applies a plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),dtemp,c,s
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dtemp = c*dx(ix) + s*dy(iy)
+        dy(iy) = c*dy(iy) - s*dx(ix)
+        dx(ix) = dtemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        dtemp = c*dx(i) + s*dy(i)
+        dy(i) = c*dy(i) - s*dx(i)
+        dx(i) = dtemp
+   30 continue
+      return
+      end
+      subroutine drotg(da,db,c,s)
+c
+c     construct givens plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c
+      double precision da,db,c,s,roe,scale,r,z
+c
+      roe = db
+      if( dabs(da) .gt. dabs(db) ) roe = da
+      scale = dabs(da) + dabs(db)
+      if( scale .ne. 0.0d0 ) go to 10
+         c = 1.0d0
+         s = 0.0d0
+         r = 0.0d0
+         z = 0.0d0
+         go to 20
+   10 r = scale*dsqrt((da/scale)**2 + (db/scale)**2)
+      r = dsign(1.0d0,roe)*r
+      c = da/r
+      s = db/r
+      z = 1.0d0
+      if( dabs(da) .gt. dabs(db) ) z = s
+      if( dabs(db) .ge. dabs(da) .and. c .ne. 0.0d0 ) z = 1.0d0/c
+   20 da = r
+      db = z
+      return
+      end
+      SUBROUTINE DROTM (N,DX,INCX,DY,INCY,DPARAM)
+C
+C     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX
+C
+C     (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN
+C     (DY**T)
+C
+C     DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
+C     LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY.
+C     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS..
+C
+C     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0
+C
+C       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0)
+C     H=(          )    (          )    (          )    (          )
+C       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0).
+C     SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM.
+C
+      DOUBLE PRECISION DFLAG,DH12,DH22,DX,TWO,Z,DH11,DH21,
+     1 DPARAM,DY,W,ZERO
+      DIMENSION DX(1),DY(1),DPARAM(5)
+      DATA ZERO,TWO/0.D0,2.D0/
+C
+      DFLAG=DPARAM(1)
+      IF(N .LE. 0 .OR.(DFLAG+TWO.EQ.ZERO)) GO TO 140
+          IF(.NOT.(INCX.EQ.INCY.AND. INCX .GT.0)) GO TO 70
+C
+               NSTEPS=N*INCX
+               IF(DFLAG) 50,10,30
+   10          CONTINUE
+               DH12=DPARAM(4)
+               DH21=DPARAM(3)
+                    DO 20 I=1,NSTEPS,INCX
+                    W=DX(I)
+                    Z=DY(I)
+                    DX(I)=W+Z*DH12
+                    DY(I)=W*DH21+Z
+   20               CONTINUE
+               GO TO 140
+   30          CONTINUE
+               DH11=DPARAM(2)
+               DH22=DPARAM(5)
+                    DO 40 I=1,NSTEPS,INCX
+                    W=DX(I)
+                    Z=DY(I)
+                    DX(I)=W*DH11+Z
+                    DY(I)=-W+DH22*Z
+   40               CONTINUE
+               GO TO 140
+   50          CONTINUE
+               DH11=DPARAM(2)
+               DH12=DPARAM(4)
+               DH21=DPARAM(3)
+               DH22=DPARAM(5)
+                    DO 60 I=1,NSTEPS,INCX
+                    W=DX(I)
+                    Z=DY(I)
+                    DX(I)=W*DH11+Z*DH12
+                    DY(I)=W*DH21+Z*DH22
+   60               CONTINUE
+               GO TO 140
+   70     CONTINUE
+          KX=1
+          KY=1
+          IF(INCX .LT. 0) KX=1+(1-N)*INCX
+          IF(INCY .LT. 0) KY=1+(1-N)*INCY
+C
+          IF(DFLAG)120,80,100
+   80     CONTINUE
+          DH12=DPARAM(4)
+          DH21=DPARAM(3)
+               DO 90 I=1,N
+               W=DX(KX)
+               Z=DY(KY)
+               DX(KX)=W+Z*DH12
+               DY(KY)=W*DH21+Z
+               KX=KX+INCX
+               KY=KY+INCY
+   90          CONTINUE
+          GO TO 140
+  100     CONTINUE
+          DH11=DPARAM(2)
+          DH22=DPARAM(5)
+               DO 110 I=1,N
+               W=DX(KX)
+               Z=DY(KY)
+               DX(KX)=W*DH11+Z
+               DY(KY)=-W+DH22*Z
+               KX=KX+INCX
+               KY=KY+INCY
+  110          CONTINUE
+          GO TO 140
+  120     CONTINUE
+          DH11=DPARAM(2)
+          DH12=DPARAM(4)
+          DH21=DPARAM(3)
+          DH22=DPARAM(5)
+               DO 130 I=1,N
+               W=DX(KX)
+               Z=DY(KY)
+               DX(KX)=W*DH11+Z*DH12
+               DY(KY)=W*DH21+Z*DH22
+               KX=KX+INCX
+               KY=KY+INCY
+  130          CONTINUE
+  140     CONTINUE
+          RETURN
+          END
+      SUBROUTINE DROTMG (DD1,DD2,DX1,DY1,DPARAM)
+C
+C     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS
+C     THE SECOND COMPONENT OF THE 2-VECTOR  (DSQRT(DD1)*DX1,DSQRT(DD2)*
+C     DY2)**T.
+C     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS..
+C
+C     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0
+C
+C       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0)
+C     H=(          )    (          )    (          )    (          )
+C       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0).
+C     LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22
+C     RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE
+C     VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.)
+C
+C     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE
+C     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE
+C     OF DD1 AND DD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM.
+C
+      DOUBLE PRECISION GAM,ONE,RGAMSQ,DD2,DH11,DH21,DPARAM,DP2,
+     1 DQ2,DU,DY1,ZERO,GAMSQ,DD1,DFLAG,DH12,DH22,DP1,DQ1,
+     2 DTEMP,DX1,TWO
+      DIMENSION DPARAM(5)
+C
+      DATA ZERO,ONE,TWO /0.D0,1.D0,2.D0/
+      DATA GAM,GAMSQ,RGAMSQ/4096.D0,16777216.D0,5.9604645D-8/
+      IF(.NOT. DD1 .LT. ZERO) GO TO 10
+C       GO ZERO-H-D-AND-DX1..
+          GO TO 60
+   10 CONTINUE
+C     CASE-DD1-NONNEGATIVE
+      DP2=DD2*DY1
+      IF(.NOT. DP2 .EQ. ZERO) GO TO 20
+          DFLAG=-TWO
+          GO TO 260
+C     REGULAR-CASE..
+   20 CONTINUE
+      DP1=DD1*DX1
+      DQ2=DP2*DY1
+      DQ1=DP1*DX1
+C
+      IF(.NOT. DABS(DQ1) .GT. DABS(DQ2)) GO TO 40
+          DH21=-DY1/DX1
+          DH12=DP2/DP1
+C
+          DU=ONE-DH12*DH21
+C
+          IF(.NOT. DU .LE. ZERO) GO TO 30
+C         GO ZERO-H-D-AND-DX1..
+               GO TO 60
+   30     CONTINUE
+               DFLAG=ZERO
+               DD1=DD1/DU
+               DD2=DD2/DU
+               DX1=DX1*DU
+C         GO SCALE-CHECK..
+               GO TO 100
+   40 CONTINUE
+          IF(.NOT. DQ2 .LT. ZERO) GO TO 50
+C         GO ZERO-H-D-AND-DX1..
+               GO TO 60
+   50     CONTINUE
+               DFLAG=ONE
+               DH11=DP1/DP2
+               DH22=DX1/DY1
+               DU=ONE+DH11*DH22
+               DTEMP=DD2/DU
+               DD2=DD1/DU
+               DD1=DTEMP
+               DX1=DY1*DU
+C         GO SCALE-CHECK
+               GO TO 100
+C     PROCEDURE..ZERO-H-D-AND-DX1..
+   60 CONTINUE
+          DFLAG=-ONE
+          DH11=ZERO
+          DH12=ZERO
+          DH21=ZERO
+          DH22=ZERO
+C
+          DD1=ZERO
+          DD2=ZERO
+          DX1=ZERO
+C         RETURN..
+          GO TO 220
+C     PROCEDURE..FIX-H..
+   70 CONTINUE
+      IF(.NOT. DFLAG .GE. ZERO) GO TO 90
+C
+          IF(.NOT. DFLAG .EQ. ZERO) GO TO 80
+          DH11=ONE
+          DH22=ONE
+          DFLAG=-ONE
+          GO TO 90
+   80     CONTINUE
+          DH21=-ONE
+          DH12=ONE
+          DFLAG=-ONE
+   90 CONTINUE
+      GO TO IGO,(120,150,180,210)
+C     PROCEDURE..SCALE-CHECK
+  100 CONTINUE
+  110     CONTINUE
+          IF(.NOT. DD1 .LE. RGAMSQ) GO TO 130
+               IF(DD1 .EQ. ZERO) GO TO 160
+               ASSIGN 120 TO IGO
+C              FIX-H..
+               GO TO 70
+  120          CONTINUE
+               DD1=DD1*GAM**2
+               DX1=DX1/GAM
+               DH11=DH11/GAM
+               DH12=DH12/GAM
+          GO TO 110
+  130 CONTINUE
+  140     CONTINUE
+          IF(.NOT. DD1 .GE. GAMSQ) GO TO 160
+               ASSIGN 150 TO IGO
+C              FIX-H..
+               GO TO 70
+  150          CONTINUE
+               DD1=DD1/GAM**2
+               DX1=DX1*GAM
+               DH11=DH11*GAM
+               DH12=DH12*GAM
+          GO TO 140
+  160 CONTINUE
+  170     CONTINUE
+          IF(.NOT. DABS(DD2) .LE. RGAMSQ) GO TO 190
+               IF(DD2 .EQ. ZERO) GO TO 220
+               ASSIGN 180 TO IGO
+C              FIX-H..
+               GO TO 70
+  180          CONTINUE
+               DD2=DD2*GAM**2
+               DH21=DH21/GAM
+               DH22=DH22/GAM
+          GO TO 170
+  190 CONTINUE
+  200     CONTINUE
+          IF(.NOT. DABS(DD2) .GE. GAMSQ) GO TO 220
+               ASSIGN 210 TO IGO
+C              FIX-H..
+               GO TO 70
+  210          CONTINUE
+               DD2=DD2/GAM**2
+               DH21=DH21*GAM
+               DH22=DH22*GAM
+          GO TO 200
+  220 CONTINUE
+          IF(DFLAG)250,230,240
+  230     CONTINUE
+               DPARAM(3)=DH21
+               DPARAM(4)=DH12
+               GO TO 260
+  240     CONTINUE
+               DPARAM(2)=DH11
+               DPARAM(5)=DH22
+               GO TO 260
+  250     CONTINUE
+               DPARAM(2)=DH11
+               DPARAM(3)=DH21
+               DPARAM(4)=DH12
+               DPARAM(5)=DH22
+  260 CONTINUE
+          DPARAM(1)=DFLAG
+          RETURN
+      END
+      SUBROUTINE DSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( 1, J )
+               L      = 1            - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( 1, J )
+               L       = 1             - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSBMV .
+*
+      END
+      subroutine  dscal(n,da,dx,incx)
+c
+c     scales a vector by a constant.
+c     uses unrolled loops for increment equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision da,dx(*)
+      integer i,incx,m,mp1,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        dx(i) = da*dx(i)
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dx(i) = da*dx(i)
+   30 continue
+      if( n .lt. 5 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        dx(i) = da*dx(i)
+        dx(i + 1) = da*dx(i + 1)
+        dx(i + 2) = da*dx(i + 2)
+        dx(i + 3) = da*dx(i + 3)
+        dx(i + 4) = da*dx(i + 4)
+   50 continue
+      return
+      end
+*DECK DSDOT
+      DOUBLE PRECISION FUNCTION DSDOT (N, SX, INCX, SY, INCY)
+C***BEGIN PROLOGUE  DSDOT
+C***PURPOSE  Compute the inner product of two vectors with extended
+C            precision accumulation and result.
+C***LIBRARY   SLATEC (BLAS)
+C***CATEGORY  D1A4
+C***TYPE      DOUBLE PRECISION (DSDOT-D, DCDOT-C)
+C***KEYWORDS  BLAS, COMPLEX VECTORS, DOT PRODUCT, INNER PRODUCT,
+C             LINEAR ALGEBRA, VECTOR
+C***AUTHOR  Lawson, C. L., (JPL)
+C           Hanson, R. J., (SNLA)
+C           Kincaid, D. R., (U. of Texas)
+C           Krogh, F. T., (JPL)
+C***DESCRIPTION
+C
+C                B L A S  Subprogram
+C    Description of Parameters
+C
+C     --Input--
+C        N  number of elements in input vector(s)
+C       SX  single precision vector with N elements
+C     INCX  storage spacing between elements of SX
+C       SY  single precision vector with N elements
+C     INCY  storage spacing between elements of SY
+C
+C     --Output--
+C    DSDOT  double precision dot product (zero if N.LE.0)
+C
+C     Returns D.P. dot product accumulated in D.P., for S.P. SX and SY
+C     DSDOT = sum for I = 0 to N-1 of  SX(LX+I*INCX) * SY(LY+I*INCY),
+C     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
+C     defined in a similar way using INCY.
+C
+C***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
+C                 Krogh, Basic linear algebra subprograms for Fortran
+C                 usage, Algorithm No. 539, Transactions on Mathematical
+C                 Software 5, 3 (September 1979), pp. 308-323.
+C***ROUTINES CALLED  (NONE)
+C***REVISION HISTORY  (YYMMDD)
+C   791001  DATE WRITTEN
+C   890831  Modified array declarations.  (WRB)
+C   890831  REVISION DATE from Version 3.2
+C   891214  Prologue converted to Version 4.0 format.  (BAB)
+C   920310  Corrected definition of LX in DESCRIPTION.  (WRB)
+C   920501  Reformatted the REFERENCES section.  (WRB)
+C***END PROLOGUE  DSDOT
+      REAL SX(*),SY(*)
+C***FIRST EXECUTABLE STATEMENT  DSDOT
+      DSDOT = 0.0D0
+      IF (N .LE. 0) RETURN
+      IF (INCX.EQ.INCY .AND. INCX.GT.0) GO TO 20
+C
+C     Code for unequal or nonpositive increments.
+C
+      KX = 1
+      KY = 1
+      IF (INCX .LT. 0) KX = 1+(1-N)*INCX
+      IF (INCY .LT. 0) KY = 1+(1-N)*INCY
+      DO 10 I = 1,N
+        DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
+        KX = KX + INCX
+        KY = KY + INCY
+   10 CONTINUE
+      RETURN
+C
+C     Code for equal, positive, non-unit increments.
+C
+   20 NS = N*INCX
+      DO 30 I = 1,NS,INCX
+        DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
+   30 CONTINUE
+      RETURN
+      END
+      SUBROUTINE DSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*AP( KK )
+               K      = KK           + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*AP( KK )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSPMV .
+*
+      END
+      SUBROUTINE DSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSPR2 .
+*
+      END
+      SUBROUTINE DSPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPR    performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSPR  .
+*
+      END
+      subroutine  dswap (n,dx,incx,dy,incy)
+c
+c     interchanges two vectors.
+c     uses unrolled loops for increments equal one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),dtemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dtemp = dx(ix)
+        dx(ix) = dy(iy)
+        dy(iy) = dtemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+c
+c       clean-up loop
+c
+   20 m = mod(n,3)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dtemp = dx(i)
+        dx(i) = dy(i)
+        dy(i) = dtemp
+   30 continue
+      if( n .lt. 3 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,3
+        dtemp = dx(i)
+        dx(i) = dy(i)
+        dy(i) = dtemp
+        dtemp = dx(i + 1)
+        dx(i + 1) = dy(i + 1)
+        dy(i + 1) = dtemp
+        dtemp = dx(i + 2)
+        dx(i + 2) = dy(i + 2)
+        dy(i + 2) = dtemp
+   50 continue
+      return
+      end
+      SUBROUTINE DSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars,  A is a symmetric matrix and  B and
+*  C are  m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      DOUBLE PRECISION   TEMP1, TEMP2
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSYMM .
+*
+      END
+      SUBROUTINE DSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( J, J )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( J, J )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYMV .
+*
+      END
+      SUBROUTINE DSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYR2 .
+*
+      END
+      SUBROUTINE DSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+*                                        beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrices  A and B.  K must be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      DOUBLE PRECISION   TEMP1, TEMP2
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYR2K.
+*
+      END
+      SUBROUTINE DSYR  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYR   performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYR  .
+*
+      END
+      SUBROUTINE DSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrix  A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE ,         ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYRK .
+*
+      END
+      SUBROUTINE DTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 90, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 110, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 130, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 150, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTBMV .
+*
+      END
+      SUBROUTINE DTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A')*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  DO 90, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  DO 110, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  DO 130, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  DO 150, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTBSV .
+*
+      END
+      SUBROUTINE DTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK =1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK - 1
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    - 1
+   90             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 110, K = KK - 1, KK - J + 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK + 1
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    + 1
+  130             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 150, K = KK + 1, KK + N - J
+                     IX   = IX   + INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTPMV .
+*
+      END
+      SUBROUTINE DTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    + 1
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, K = KK, KK + J - 2
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  K = KK
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    - 1
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - (N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTPSV .
+*
+      END
+      SUBROUTINE DTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      DOUBLE PRECISION   ALPHA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B.
+*
+            IF( UPPER )THEN
+               DO 110, J = 1, N
+                  DO 100, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 90, K = 1, I - 1
+                        TEMP = TEMP + A( K, I )*B( K, J )
+   90                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  100             CONTINUE
+  110          CONTINUE
+            ELSE
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 120, K = I + 1, M
+                        TEMP = TEMP + A( K, I )*B( K, J )
+  120                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  130             CONTINUE
+  140          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 180, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  150             CONTINUE
+                  DO 170, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 160, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  160                   CONTINUE
+                     END IF
+  170             CONTINUE
+  180          CONTINUE
+            ELSE
+               DO 220, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 190, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  190             CONTINUE
+                  DO 210, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+  220          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'.
+*
+            IF( UPPER )THEN
+               DO 260, K = 1, N
+                  DO 240, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 250, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            ELSE
+               DO 300, K = N, 1, -1
+                  DO 280, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 270, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  270                   CONTINUE
+                     END IF
+  280             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+  300          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRMM .
+*
+      END
+      SUBROUTINE DTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 110, I = J - 1, 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + A( I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = J + 1, N
+                     IX   = IX   + INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRMV .
+*
+      END
+      SUBROUTINE DTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      DOUBLE PRECISION   ALPHA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 130, J = 1, N
+                  DO 120, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     DO 110, K = 1, I - 1
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  110                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  120             CONTINUE
+  130          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     DO 140, K = I + 1, M
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  140                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 210, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 170, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  170                CONTINUE
+                  END IF
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 200, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  200                CONTINUE
+                  END IF
+  210          CONTINUE
+            ELSE
+               DO 260, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 220, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  220                CONTINUE
+                  END IF
+                  DO 240, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 250, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' ).
+*
+            IF( UPPER )THEN
+               DO 310, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+                  DO 290, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 280, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  280                   CONTINUE
+                     END IF
+  290             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 300, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  300                CONTINUE
+                  END IF
+  310          CONTINUE
+            ELSE
+               DO 360, K = 1, N
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 320, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  320                CONTINUE
+                  END IF
+                  DO 340, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 330, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  330                   CONTINUE
+                     END IF
+  340             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 350, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  350                CONTINUE
+                  END IF
+  360          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRSM .
+*
+      END
+      SUBROUTINE DTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRSV .
+*
+      END
+      double precision function dzasum(n,zx,incx)
+c
+c     takes the sum of the absolute values.
+c     jack dongarra, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*)
+      double precision stemp,dcabs1
+      integer i,incx,ix,n
+c
+      dzasum = 0.0d0
+      stemp = 0.0d0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      do 10 i = 1,n
+        stemp = stemp + dcabs1(zx(ix))
+        ix = ix + incx
+   10 continue
+      dzasum = stemp
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        stemp = stemp + dcabs1(zx(i))
+   30 continue
+      dzasum = stemp
+      return
+      end
+      DOUBLE PRECISION FUNCTION DZNRM2( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      COMPLEX*16                        X( * )
+*     ..
+*
+*  DZNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     DZNRM2 := sqrt( conjg( x' )*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to ZLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION      ONE         , ZERO
+      PARAMETER           ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      DOUBLE PRECISION      NORM, SCALE, SSQ, TEMP
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, DIMAG, DBLE, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( DBLE( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( DBLE( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+            IF( DIMAG( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( DIMAG( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      DZNRM2 = NORM
+      RETURN
+*
+*     End of DZNRM2.
+*
+      END
+      integer function icamax(n,cx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*)
+      real smax
+      integer i,incx,ix,n
+      complex zdum
+      real cabs1
+      cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
+c
+      icamax = 0
+      if( n.lt.1 .or. incx.le.0 ) return
+      icamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      smax = cabs1(cx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(cabs1(cx(ix)).le.smax) go to 5
+         icamax = i
+         smax = cabs1(cx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 smax = cabs1(cx(1))
+      do 30 i = 2,n
+         if(cabs1(cx(i)).le.smax) go to 30
+         icamax = i
+         smax = cabs1(cx(i))
+   30 continue
+      return
+      end
+      integer function idamax(n,dx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dmax
+      integer i,incx,ix,n
+c
+      idamax = 0
+      if( n.lt.1 .or. incx.le.0 ) return
+      idamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      dmax = dabs(dx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(dabs(dx(ix)).le.dmax) go to 5
+         idamax = i
+         dmax = dabs(dx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 dmax = dabs(dx(1))
+      do 30 i = 2,n
+         if(dabs(dx(i)).le.dmax) go to 30
+         idamax = i
+         dmax = dabs(dx(i))
+   30 continue
+      return
+      end
+      integer function isamax(n,sx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),smax
+      integer i,incx,ix,n
+c
+      isamax = 0
+      if( n.lt.1 .or. incx.le.0 ) return
+      isamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      smax = abs(sx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(abs(sx(ix)).le.smax) go to 5
+         isamax = i
+         smax = abs(sx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 smax = abs(sx(1))
+      do 30 i = 2,n
+         if(abs(sx(i)).le.smax) go to 30
+         isamax = i
+         smax = abs(sx(i))
+   30 continue
+      return
+      end
+      integer function izamax(n,zx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, 1/15/85.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*)
+      double precision smax
+      integer i,incx,ix,n
+      double precision dcabs1
+c
+      izamax = 0
+      if( n.lt.1 .or. incx.le.0 )return
+      izamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      smax = dcabs1(zx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(dcabs1(zx(ix)).le.smax) go to 5
+         izamax = i
+         smax = dcabs1(zx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 smax = dcabs1(zx(1))
+      do 30 i = 2,n
+         if(dcabs1(zx(i)).le.smax) go to 30
+         izamax = i
+         smax = dcabs1(zx(i))
+   30 continue
+      return
+      end
+      LOGICAL          FUNCTION LSAME( CA, CB )
+*
+*  -- LAPACK auxiliary routine (version 2.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     January 31, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          CA, CB
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  LSAME returns .TRUE. if CA is the same letter as CB regardless of
+*  case.
+*
+*  Arguments
+*  =========
+*
+*  CA      (input) CHARACTER*1
+*  CB      (input) CHARACTER*1
+*          CA and CB specify the single characters to be compared.
+*
+* =====================================================================
+*
+*     .. Intrinsic Functions ..
+      INTRINSIC          ICHAR
+*     ..
+*     .. Local Scalars ..
+      INTEGER            INTA, INTB, ZCODE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test if the characters are equal
+*
+      LSAME = CA.EQ.CB
+      IF( LSAME )
+     $   RETURN
+*
+*     Now test for equivalence if both characters are alphabetic.
+*
+      ZCODE = ICHAR( 'Z' )
+*
+*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+*     machines, on which ICHAR returns a value with bit 8 set.
+*     ICHAR('A') on Prime machines returns 193 which is the same as
+*     ICHAR('A') on an EBCDIC machine.
+*
+      INTA = ICHAR( CA )
+      INTB = ICHAR( CB )
+*
+      IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN
+*
+*        ASCII is assumed - ZCODE is the ASCII code of either lower or
+*        upper case 'Z'.
+*
+         IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32
+         IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32
+*
+      ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN
+*
+*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+*        upper case 'Z'.
+*
+         IF( INTA.GE.129 .AND. INTA.LE.137 .OR.
+     $       INTA.GE.145 .AND. INTA.LE.153 .OR.
+     $       INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64
+         IF( INTB.GE.129 .AND. INTB.LE.137 .OR.
+     $       INTB.GE.145 .AND. INTB.LE.153 .OR.
+     $       INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64
+*
+      ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN
+*
+*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+*        plus 128 of either lower or upper case 'Z'.
+*
+         IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32
+         IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32
+      END IF
+      LSAME = INTA.EQ.INTB
+*
+*     RETURN
+*
+*     End of LSAME
+*
+      END
+      real function sasum(n,sx,incx)
+c
+c     takes the sum of the absolute values.
+c     uses unrolled loops for increment equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),stemp
+      integer i,incx,m,mp1,n,nincx
+c
+      sasum = 0.0e0
+      stemp = 0.0e0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        stemp = stemp + abs(sx(i))
+   10 continue
+      sasum = stemp
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,6)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        stemp = stemp + abs(sx(i))
+   30 continue
+      if( n .lt. 6 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,6
+        stemp = stemp + abs(sx(i)) + abs(sx(i + 1)) + abs(sx(i + 2))
+     *  + abs(sx(i + 3)) + abs(sx(i + 4)) + abs(sx(i + 5))
+   50 continue
+   60 sasum = stemp
+      return
+      end
+      subroutine saxpy(n,sa,sx,incx,sy,incy)
+c
+c     constant times a vector plus a vector.
+c     uses unrolled loop for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),sa
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if (sa .eq. 0.0) return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        sy(iy) = sy(iy) + sa*sx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,4)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        sy(i) = sy(i) + sa*sx(i)
+   30 continue
+      if( n .lt. 4 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,4
+        sy(i) = sy(i) + sa*sx(i)
+        sy(i + 1) = sy(i + 1) + sa*sx(i + 1)
+        sy(i + 2) = sy(i + 2) + sa*sx(i + 2)
+        sy(i + 3) = sy(i + 3) + sa*sx(i + 3)
+   50 continue
+      return
+      end
+      real function scasum(n,cx,incx)
+c
+c     takes the sum of the absolute values of a complex vector and
+c     returns a single precision result.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*)
+      real stemp
+      integer i,incx,n,nincx
+c
+      scasum = 0.0e0
+      stemp = 0.0e0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i)))
+   10 continue
+      scasum = stemp
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i)))
+   30 continue
+      scasum = stemp
+      return
+      end
+      REAL             FUNCTION SCNRM2( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      COMPLEX                           X( * )
+*     ..
+*
+*  SCNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     SCNRM2 := sqrt( conjg( x' )*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to CLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      REAL                  ONE         , ZERO
+      PARAMETER           ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      REAL                  NORM, SCALE, SSQ, TEMP
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, AIMAG, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL CLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( REAL( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( REAL( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+            IF( AIMAG( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( AIMAG( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      SCNRM2 = NORM
+      RETURN
+*
+*     End of SCNRM2.
+*
+      END
+      subroutine scopy(n,sx,incx,sy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     uses unrolled loops for increments equal to 1.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*)
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        sy(iy) = sx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,7)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        sy(i) = sx(i)
+   30 continue
+      if( n .lt. 7 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,7
+        sy(i) = sx(i)
+        sy(i + 1) = sx(i + 1)
+        sy(i + 2) = sx(i + 2)
+        sy(i + 3) = sx(i + 3)
+        sy(i + 4) = sx(i + 4)
+        sy(i + 5) = sx(i + 5)
+        sy(i + 6) = sx(i + 6)
+   50 continue
+      return
+      end
+      real function sdot(n,sx,incx,sy,incy)
+c
+c     forms the dot product of two vectors.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),stemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      stemp = 0.0e0
+      sdot = 0.0e0
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        stemp = stemp + sx(ix)*sy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      sdot = stemp
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        stemp = stemp + sx(i)*sy(i)
+   30 continue
+      if( n .lt. 5 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) +
+     *   sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4)
+   50 continue
+   60 sdot = stemp
+      return
+      end
+*DECK SDSDOT
+      REAL FUNCTION SDSDOT (N, SB, SX, INCX, SY, INCY)
+C***BEGIN PROLOGUE  SDSDOT
+C***PURPOSE  Compute the inner product of two vectors with extended
+C            precision accumulation.
+C***LIBRARY   SLATEC (BLAS)
+C***CATEGORY  D1A4
+C***TYPE      SINGLE PRECISION (SDSDOT-S, CDCDOT-C)
+C***KEYWORDS  BLAS, DOT PRODUCT, INNER PRODUCT, LINEAR ALGEBRA, VECTOR
+C***AUTHOR  Lawson, C. L., (JPL)
+C           Hanson, R. J., (SNLA)
+C           Kincaid, D. R., (U. of Texas)
+C           Krogh, F. T., (JPL)
+C***DESCRIPTION
+C
+C                B L A S  Subprogram
+C    Description of Parameters
+C
+C     --Input--
+C        N  number of elements in input vector(s)
+C       SB  single precision scalar to be added to inner product
+C       SX  single precision vector with N elements
+C     INCX  storage spacing between elements of SX
+C       SY  single precision vector with N elements
+C     INCY  storage spacing between elements of SY
+C
+C     --Output--
+C   SDSDOT  single precision dot product (SB if N .LE. 0)
+C
+C     Returns S.P. result with dot product accumulated in D.P.
+C     SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
+C     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
+C     defined in a similar way using INCY.
+C
+C***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
+C                 Krogh, Basic linear algebra subprograms for Fortran
+C                 usage, Algorithm No. 539, Transactions on Mathematical
+C                 Software 5, 3 (September 1979), pp. 308-323.
+C***ROUTINES CALLED  (NONE)
+C***REVISION HISTORY  (YYMMDD)
+C   791001  DATE WRITTEN
+C   890531  Changed all specific intrinsics to generic.  (WRB)
+C   890831  Modified array declarations.  (WRB)
+C   890831  REVISION DATE from Version 3.2
+C   891214  Prologue converted to Version 4.0 format.  (BAB)
+C   920310  Corrected definition of LX in DESCRIPTION.  (WRB)
+C   920501  Reformatted the REFERENCES section.  (WRB)
+C***END PROLOGUE  SDSDOT
+      REAL SX(*), SY(*), SB
+      DOUBLE PRECISION DSDOT
+C***FIRST EXECUTABLE STATEMENT  SDSDOT
+      DSDOT = SB
+      IF (N .LE. 0) GO TO 30
+      IF (INCX.EQ.INCY .AND. INCX.GT.0) GO TO 40
+C
+C     Code for unequal or nonpositive increments.
+C
+      KX = 1
+      KY = 1
+      IF (INCX .LT. 0) KX = 1+(1-N)*INCX
+      IF (INCY .LT. 0) KY = 1+(1-N)*INCY
+      DO 10 I = 1,N
+        DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
+        KX = KX + INCX
+        KY = KY + INCY
+   10 CONTINUE
+   30 SDSDOT = DSDOT
+      RETURN
+C
+C     Code for equal and positive increments.
+C
+   40 NS = N*INCX
+      DO 50 I = 1,NS,INCX
+        DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
+   50 CONTINUE
+      SDSDOT = DSDOT
+      RETURN
+      END
+      SUBROUTINE SGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SGBMV .
+*
+      END
+      SUBROUTINE SGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X',
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = B'.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+*     and  columns of  A  and the  number of  rows  of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And if  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      ELSE
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B' + beta*C
+*
+            DO 170, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 130, I = 1, M
+                     C( I, J ) = ZERO
+  130             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 140, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  140             CONTINUE
+               END IF
+               DO 160, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 150, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+  170       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 200, J = 1, N
+               DO 190, I = 1, M
+                  TEMP = ZERO
+                  DO 180, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  180             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SGEMM .
+*
+      END
+      SUBROUTINE SGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               DO 90, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               DO 110, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SGEMV .
+*
+      END
+      SUBROUTINE SGER  ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGER   performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SGER  .
+*
+      END
+      REAL             FUNCTION SNRM2 ( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      REAL                              X( * )
+*     ..
+*
+*  SNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     SNRM2 := sqrt( x'*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to SLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      REAL                  ONE         , ZERO
+      PARAMETER           ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      REAL                  ABSXI, NORM, SCALE, SSQ
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE IF( N.EQ.1 )THEN
+         NORM  = ABS( X( 1 ) )
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL SLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( X( IX ).NE.ZERO )THEN
+               ABSXI = ABS( X( IX ) )
+               IF( SCALE.LT.ABSXI )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/ABSXI )**2
+                  SCALE = ABSXI
+               ELSE
+                  SSQ   = SSQ   +     ( ABSXI/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      SNRM2 = NORM
+      RETURN
+*
+*     End of SNRM2.
+*
+      END
+      subroutine srot (n,sx,incx,sy,incy,c,s)
+c
+c     applies a plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),stemp,c,s
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        stemp = c*sx(ix) + s*sy(iy)
+        sy(iy) = c*sy(iy) - s*sx(ix)
+        sx(ix) = stemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        stemp = c*sx(i) + s*sy(i)
+        sy(i) = c*sy(i) - s*sx(i)
+        sx(i) = stemp
+   30 continue
+      return
+      end
+      subroutine srotg(sa,sb,c,s)
+c
+c     construct givens plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c
+      real sa,sb,c,s,roe,scale,r,z
+c
+      roe = sb
+      if( abs(sa) .gt. abs(sb) ) roe = sa
+      scale = abs(sa) + abs(sb)
+      if( scale .ne. 0.0 ) go to 10
+         c = 1.0
+         s = 0.0
+         r = 0.0
+         z = 0.0
+         go to 20
+   10 r = scale*sqrt((sa/scale)**2 + (sb/scale)**2)
+      r = sign(1.0,roe)*r
+      c = sa/r
+      s = sb/r
+      z = 1.0
+      if( abs(sa) .gt. abs(sb) ) z = s
+      if( abs(sb) .ge. abs(sa) .and. c .ne. 0.0 ) z = 1.0/c
+   20 sa = r
+      sb = z
+      return
+      end
+      SUBROUTINE SROTM (N,SX,INCX,SY,INCY,SPARAM)
+C
+C     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX
+C
+C     (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN
+C     (DX**T)
+C
+C     SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
+C     LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY.
+C     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS..
+C
+C     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0
+C
+C       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0)
+C     H=(          )    (          )    (          )    (          )
+C       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0).
+C     SEE  SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM.
+C
+      DIMENSION SX(1),SY(1),SPARAM(5)
+      DATA ZERO,TWO/0.E0,2.E0/
+C
+      SFLAG=SPARAM(1)
+      IF(N .LE. 0 .OR.(SFLAG+TWO.EQ.ZERO)) GO TO 140
+          IF(.NOT.(INCX.EQ.INCY.AND. INCX .GT.0)) GO TO 70
+C
+               NSTEPS=N*INCX
+               IF(SFLAG) 50,10,30
+   10          CONTINUE
+               SH12=SPARAM(4)
+               SH21=SPARAM(3)
+                    DO 20 I=1,NSTEPS,INCX
+                    W=SX(I)
+                    Z=SY(I)
+                    SX(I)=W+Z*SH12
+                    SY(I)=W*SH21+Z
+   20               CONTINUE
+               GO TO 140
+   30          CONTINUE
+               SH11=SPARAM(2)
+               SH22=SPARAM(5)
+                    DO 40 I=1,NSTEPS,INCX
+                    W=SX(I)
+                    Z=SY(I)
+                    SX(I)=W*SH11+Z
+                    SY(I)=-W+SH22*Z
+   40               CONTINUE
+               GO TO 140
+   50          CONTINUE
+               SH11=SPARAM(2)
+               SH12=SPARAM(4)
+               SH21=SPARAM(3)
+               SH22=SPARAM(5)
+                    DO 60 I=1,NSTEPS,INCX
+                    W=SX(I)
+                    Z=SY(I)
+                    SX(I)=W*SH11+Z*SH12
+                    SY(I)=W*SH21+Z*SH22
+   60               CONTINUE
+               GO TO 140
+   70     CONTINUE
+          KX=1
+          KY=1
+          IF(INCX .LT. 0) KX=1+(1-N)*INCX
+          IF(INCY .LT. 0) KY=1+(1-N)*INCY
+C
+          IF(SFLAG)120,80,100
+   80     CONTINUE
+          SH12=SPARAM(4)
+          SH21=SPARAM(3)
+               DO 90 I=1,N
+               W=SX(KX)
+               Z=SY(KY)
+               SX(KX)=W+Z*SH12
+               SY(KY)=W*SH21+Z
+               KX=KX+INCX
+               KY=KY+INCY
+   90          CONTINUE
+          GO TO 140
+  100     CONTINUE
+          SH11=SPARAM(2)
+          SH22=SPARAM(5)
+               DO 110 I=1,N
+               W=SX(KX)
+               Z=SY(KY)
+               SX(KX)=W*SH11+Z
+               SY(KY)=-W+SH22*Z
+               KX=KX+INCX
+               KY=KY+INCY
+  110          CONTINUE
+          GO TO 140
+  120     CONTINUE
+          SH11=SPARAM(2)
+          SH12=SPARAM(4)
+          SH21=SPARAM(3)
+          SH22=SPARAM(5)
+               DO 130 I=1,N
+               W=SX(KX)
+               Z=SY(KY)
+               SX(KX)=W*SH11+Z*SH12
+               SY(KY)=W*SH21+Z*SH22
+               KX=KX+INCX
+               KY=KY+INCY
+  130          CONTINUE
+  140     CONTINUE
+          RETURN
+          END
+      SUBROUTINE SROTMG (SD1,SD2,SX1,SY1,SPARAM)
+C
+C     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS
+C     THE SECOND COMPONENT OF THE 2-VECTOR  (SQRT(SD1)*SX1,SQRT(SD2)*
+C     SY2)**T.
+C     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS..
+C
+C     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0
+C
+C       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0)
+C     H=(          )    (          )    (          )    (          )
+C       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0).
+C     LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22
+C     RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE
+C     VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.)
+C
+C     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE
+C     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE
+C     OF SD1 AND SD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM.
+C
+      DIMENSION SPARAM(5)
+C
+      DATA ZERO,ONE,TWO /0.E0,1.E0,2.E0/
+      DATA GAM,GAMSQ,RGAMSQ/4096.E0,1.67772E7,5.96046E-8/
+      IF(.NOT. SD1 .LT. ZERO) GO TO 10
+C       GO ZERO-H-D-AND-SX1..
+          GO TO 60
+   10 CONTINUE
+C     CASE-SD1-NONNEGATIVE
+      SP2=SD2*SY1
+      IF(.NOT. SP2 .EQ. ZERO) GO TO 20
+          SFLAG=-TWO
+          GO TO 260
+C     REGULAR-CASE..
+   20 CONTINUE
+      SP1=SD1*SX1
+      SQ2=SP2*SY1
+      SQ1=SP1*SX1
+C
+      IF(.NOT. ABS(SQ1) .GT. ABS(SQ2)) GO TO 40
+          SH21=-SY1/SX1
+          SH12=SP2/SP1
+C
+          SU=ONE-SH12*SH21
+C
+          IF(.NOT. SU .LE. ZERO) GO TO 30
+C         GO ZERO-H-D-AND-SX1..
+               GO TO 60
+   30     CONTINUE
+               SFLAG=ZERO
+               SD1=SD1/SU
+               SD2=SD2/SU
+               SX1=SX1*SU
+C         GO SCALE-CHECK..
+               GO TO 100
+   40 CONTINUE
+          IF(.NOT. SQ2 .LT. ZERO) GO TO 50
+C         GO ZERO-H-D-AND-SX1..
+               GO TO 60
+   50     CONTINUE
+               SFLAG=ONE
+               SH11=SP1/SP2
+               SH22=SX1/SY1
+               SU=ONE+SH11*SH22
+               STEMP=SD2/SU
+               SD2=SD1/SU
+               SD1=STEMP
+               SX1=SY1*SU
+C         GO SCALE-CHECK
+               GO TO 100
+C     PROCEDURE..ZERO-H-D-AND-SX1..
+   60 CONTINUE
+          SFLAG=-ONE
+          SH11=ZERO
+          SH12=ZERO
+          SH21=ZERO
+          SH22=ZERO
+C
+          SD1=ZERO
+          SD2=ZERO
+          SX1=ZERO
+C         RETURN..
+          GO TO 220
+C     PROCEDURE..FIX-H..
+   70 CONTINUE
+      IF(.NOT. SFLAG .GE. ZERO) GO TO 90
+C
+          IF(.NOT. SFLAG .EQ. ZERO) GO TO 80
+          SH11=ONE
+          SH22=ONE
+          SFLAG=-ONE
+          GO TO 90
+   80     CONTINUE
+          SH21=-ONE
+          SH12=ONE
+          SFLAG=-ONE
+   90 CONTINUE
+      GO TO IGO,(120,150,180,210)
+C     PROCEDURE..SCALE-CHECK
+  100 CONTINUE
+  110     CONTINUE
+          IF(.NOT. SD1 .LE. RGAMSQ) GO TO 130
+               IF(SD1 .EQ. ZERO) GO TO 160
+               ASSIGN 120 TO IGO
+C              FIX-H..
+               GO TO 70
+  120          CONTINUE
+               SD1=SD1*GAM**2
+               SX1=SX1/GAM
+               SH11=SH11/GAM
+               SH12=SH12/GAM
+          GO TO 110
+  130 CONTINUE
+  140     CONTINUE
+          IF(.NOT. SD1 .GE. GAMSQ) GO TO 160
+               ASSIGN 150 TO IGO
+C              FIX-H..
+               GO TO 70
+  150          CONTINUE
+               SD1=SD1/GAM**2
+               SX1=SX1*GAM
+               SH11=SH11*GAM
+               SH12=SH12*GAM
+          GO TO 140
+  160 CONTINUE
+  170     CONTINUE
+          IF(.NOT. ABS(SD2) .LE. RGAMSQ) GO TO 190
+               IF(SD2 .EQ. ZERO) GO TO 220
+               ASSIGN 180 TO IGO
+C              FIX-H..
+               GO TO 70
+  180          CONTINUE
+               SD2=SD2*GAM**2
+               SH21=SH21/GAM
+               SH22=SH22/GAM
+          GO TO 170
+  190 CONTINUE
+  200     CONTINUE
+          IF(.NOT. ABS(SD2) .GE. GAMSQ) GO TO 220
+               ASSIGN 210 TO IGO
+C              FIX-H..
+               GO TO 70
+  210          CONTINUE
+               SD2=SD2/GAM**2
+               SH21=SH21*GAM
+               SH22=SH22*GAM
+          GO TO 200
+  220 CONTINUE
+          IF(SFLAG)250,230,240
+  230     CONTINUE
+               SPARAM(3)=SH21
+               SPARAM(4)=SH12
+               GO TO 260
+  240     CONTINUE
+               SPARAM(2)=SH11
+               SPARAM(5)=SH22
+               GO TO 260
+  250     CONTINUE
+               SPARAM(2)=SH11
+               SPARAM(3)=SH21
+               SPARAM(4)=SH12
+               SPARAM(5)=SH22
+  260 CONTINUE
+          SPARAM(1)=SFLAG
+          RETURN
+      END
+      SUBROUTINE SSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( 1, J )
+               L      = 1            - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( 1, J )
+               L       = 1             - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSBMV .
+*
+      END
+      subroutine sscal(n,sa,sx,incx)
+c
+c     scales a vector by a constant.
+c     uses unrolled loops for increment equal to 1.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sa,sx(*)
+      integer i,incx,m,mp1,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        sx(i) = sa*sx(i)
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        sx(i) = sa*sx(i)
+   30 continue
+      if( n .lt. 5 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        sx(i) = sa*sx(i)
+        sx(i + 1) = sa*sx(i + 1)
+        sx(i + 2) = sa*sx(i + 2)
+        sx(i + 3) = sa*sx(i + 3)
+        sx(i + 4) = sa*sx(i + 4)
+   50 continue
+      return
+      end
+      SUBROUTINE SSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*AP( KK )
+               K      = KK           + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*AP( KK )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSPMV .
+*
+      END
+      SUBROUTINE SSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSPR2 .
+*
+      END
+      SUBROUTINE SSPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPR    performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSPR  .
+*
+      END
+      subroutine sswap (n,sx,incx,sy,incy)
+c
+c     interchanges two vectors.
+c     uses unrolled loops for increments equal to 1.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),stemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        stemp = sx(ix)
+        sx(ix) = sy(iy)
+        sy(iy) = stemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+c
+c       clean-up loop
+c
+   20 m = mod(n,3)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        stemp = sx(i)
+        sx(i) = sy(i)
+        sy(i) = stemp
+   30 continue
+      if( n .lt. 3 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,3
+        stemp = sx(i)
+        sx(i) = sy(i)
+        sy(i) = stemp
+        stemp = sx(i + 1)
+        sx(i + 1) = sy(i + 1)
+        sy(i + 1) = stemp
+        stemp = sx(i + 2)
+        sx(i + 2) = sy(i + 2)
+        sy(i + 2) = stemp
+   50 continue
+      return
+      end
+      SUBROUTINE SSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars,  A is a symmetric matrix and  B and
+*  C are  m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      REAL               TEMP1, TEMP2
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSYMM .
+*
+      END
+      SUBROUTINE SSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( J, J )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( J, J )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYMV .
+*
+      END
+      SUBROUTINE SSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYR2 .
+*
+      END
+      SUBROUTINE SSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+*                                        beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrices  A and B.  K must be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      REAL               TEMP1, TEMP2
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYR2K.
+*
+      END
+      SUBROUTINE SSYR  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYR   performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYR  .
+*
+      END
+      SUBROUTINE SSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrix  A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE ,         ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYRK .
+*
+      END
+      SUBROUTINE STBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 90, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 110, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 130, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 150, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STBMV .
+*
+      END
+      SUBROUTINE STBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A')*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  DO 90, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  DO 110, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  DO 130, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  DO 150, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STBSV .
+*
+      END
+      SUBROUTINE STPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK =1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK - 1
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    - 1
+   90             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 110, K = KK - 1, KK - J + 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK + 1
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    + 1
+  130             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 150, K = KK + 1, KK + N - J
+                     IX   = IX   + INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STPMV .
+*
+      END
+      SUBROUTINE STPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    + 1
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, K = KK, KK + J - 2
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  K = KK
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    - 1
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - (N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STPSV .
+*
+      END
+      SUBROUTINE STRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      REAL               ALPHA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B.
+*
+            IF( UPPER )THEN
+               DO 110, J = 1, N
+                  DO 100, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 90, K = 1, I - 1
+                        TEMP = TEMP + A( K, I )*B( K, J )
+   90                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  100             CONTINUE
+  110          CONTINUE
+            ELSE
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 120, K = I + 1, M
+                        TEMP = TEMP + A( K, I )*B( K, J )
+  120                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  130             CONTINUE
+  140          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 180, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  150             CONTINUE
+                  DO 170, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 160, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  160                   CONTINUE
+                     END IF
+  170             CONTINUE
+  180          CONTINUE
+            ELSE
+               DO 220, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 190, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  190             CONTINUE
+                  DO 210, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+  220          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'.
+*
+            IF( UPPER )THEN
+               DO 260, K = 1, N
+                  DO 240, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 250, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            ELSE
+               DO 300, K = N, 1, -1
+                  DO 280, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 270, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  270                   CONTINUE
+                     END IF
+  280             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+  300          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRMM .
+*
+      END
+      SUBROUTINE STRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 110, I = J - 1, 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + A( I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = J + 1, N
+                     IX   = IX   + INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRMV .
+*
+      END
+      SUBROUTINE STRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      REAL               ALPHA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 130, J = 1, N
+                  DO 120, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     DO 110, K = 1, I - 1
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  110                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  120             CONTINUE
+  130          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     DO 140, K = I + 1, M
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  140                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 210, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 170, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  170                CONTINUE
+                  END IF
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 200, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  200                CONTINUE
+                  END IF
+  210          CONTINUE
+            ELSE
+               DO 260, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 220, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  220                CONTINUE
+                  END IF
+                  DO 240, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 250, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' ).
+*
+            IF( UPPER )THEN
+               DO 310, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+                  DO 290, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 280, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  280                   CONTINUE
+                     END IF
+  290             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 300, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  300                CONTINUE
+                  END IF
+  310          CONTINUE
+            ELSE
+               DO 360, K = 1, N
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 320, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  320                CONTINUE
+                  END IF
+                  DO 340, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 330, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  330                   CONTINUE
+                     END IF
+  340             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 350, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  350                CONTINUE
+                  END IF
+  360          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRSM .
+*
+      END
+      SUBROUTINE STRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRSV .
+*
+      END
+      SUBROUTINE XERBLA( SRNAME, INFO )
+*
+*  -- LAPACK auxiliary routine (preliminary version) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER*6        SRNAME
+      INTEGER            INFO
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  XERBLA  is an error handler for the LAPACK routines.
+*  It is called by an LAPACK routine if an input parameter has an
+*  invalid value.  A message is printed and execution stops.
+*
+*  Installers may consider modifying the STOP statement in order to
+*  call system-specific exception-handling facilities.
+*
+*  Arguments
+*  =========
+*
+*  SRNAME  (input) CHARACTER*6
+*          The name of the routine which called XERBLA.
+*
+*  INFO    (input) INTEGER
+*          The position of the invalid parameter in the parameter list
+*          of the calling routine.
+*
+*
+      WRITE( *, FMT = 9999 )SRNAME, INFO
+*
+      STOP
+*
+ 9999 FORMAT( ' ** On entry to ', A6, ' parameter number ', I2, ' had ',
+     $      'an illegal value' )
+*
+*     End of XERBLA
+*
+      END
+      subroutine zaxpy(n,za,zx,incx,zy,incy)
+c
+c     constant times a vector plus a vector.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),za
+      integer i,incx,incy,ix,iy,n
+      double precision dcabs1
+      if(n.le.0)return
+      if (dcabs1(za) .eq. 0.0d0) return
+      if (incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        zy(iy) = zy(iy) + za*zx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        zy(i) = zy(i) + za*zx(i)
+   30 continue
+      return
+      end
+      subroutine  zcopy(n,zx,incx,zy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     jack dongarra, linpack, 4/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*)
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        zy(iy) = zx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        zy(i) = zx(i)
+   30 continue
+      return
+      end
+      double complex function zdotc(n,zx,incx,zy,incy)
+c
+c     forms the dot product of a vector.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),ztemp
+      integer i,incx,incy,ix,iy,n
+      ztemp = (0.0d0,0.0d0)
+      zdotc = (0.0d0,0.0d0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ztemp = ztemp + dconjg(zx(ix))*zy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      zdotc = ztemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ztemp = ztemp + dconjg(zx(i))*zy(i)
+   30 continue
+      zdotc = ztemp
+      return
+      end
+      double complex function zdotu(n,zx,incx,zy,incy)
+c
+c     forms the dot product of two vectors.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),ztemp
+      integer i,incx,incy,ix,iy,n
+      ztemp = (0.0d0,0.0d0)
+      zdotu = (0.0d0,0.0d0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ztemp = ztemp + zx(ix)*zy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      zdotu = ztemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ztemp = ztemp + zx(i)*zy(i)
+   30 continue
+      zdotu = ztemp
+      return
+      end
+      subroutine  zdrot (n,zx,incx,zy,incy,c,s)
+c
+c     applies a plane rotation, where the cos and sin (c and s) are
+c     double precision and the vectors zx and zy are double complex.
+c     jack dongarra, linpack, 3/11/78.
+c
+      double complex zx(1),zy(1),ztemp
+      double precision c,s
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ztemp = c*zx(ix) + s*zy(iy)
+        zy(iy) = c*zy(iy) - s*zx(ix)
+        zx(ix) = ztemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ztemp = c*zx(i) + s*zy(i)
+        zy(i) = c*zy(i) - s*zx(i)
+        zx(i) = ztemp
+   30 continue
+      return
+      end
+      subroutine  zdscal(n,da,zx,incx)
+c
+c     scales a vector by a constant.
+c     jack dongarra, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*)
+      double precision da
+      integer i,incx,ix,n
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      do 10 i = 1,n
+        zx(ix) = dcmplx(da,0.0d0)*zx(ix)
+        ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        zx(i) = dcmplx(da,0.0d0)*zx(i)
+   30 continue
+      return
+      end
+      SUBROUTINE ZGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + DCONJG( A( K + I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + DCONJG( A( K + I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZGBMV .
+*
+      END
+      SUBROUTINE ZGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            CONJA, CONJB, NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
+*     B  respectively are to be  transposed but  not conjugated  and set
+*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
+*     and the number of rows of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      CONJA = LSAME( TRANSA, 'C' )
+      CONJB = LSAME( TRANSB, 'C' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.CONJA                ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.CONJB                ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE IF( CONJA )THEN
+*
+*           Form  C := alpha*conjg( A' )*B + beta*C.
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 150, J = 1, N
+               DO 140, I = 1, M
+                  TEMP = ZERO
+                  DO 130, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  130             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  140          CONTINUE
+  150       CONTINUE
+         END IF
+      ELSE IF( NOTA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A*conjg( B' ) + beta*C.
+*
+            DO 200, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 160, I = 1, M
+                     C( I, J ) = ZERO
+  160             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 170, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  170             CONTINUE
+               END IF
+               DO 190, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*DCONJG( B( J, L ) )
+                     DO 180, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  180                CONTINUE
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A*B'          + beta*C
+*
+            DO 250, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 210, I = 1, M
+                     C( I, J ) = ZERO
+  210             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 220, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  220             CONTINUE
+               END IF
+               DO 240, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 230, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+  250       CONTINUE
+         END IF
+      ELSE IF( CONJA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C.
+*
+            DO 280, J = 1, N
+               DO 270, I = 1, M
+                  TEMP = ZERO
+                  DO 260, L = 1, K
+                     TEMP = TEMP +
+     $                      DCONJG( A( L, I ) )*DCONJG( B( J, L ) )
+  260             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  270          CONTINUE
+  280       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*conjg( A' )*B' + beta*C
+*
+            DO 310, J = 1, N
+               DO 300, I = 1, M
+                  TEMP = ZERO
+                  DO 290, L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*B( J, L )
+  290             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  300          CONTINUE
+  310       CONTINUE
+         END IF
+      ELSE
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A'*conjg( B' ) + beta*C
+*
+            DO 340, J = 1, N
+               DO 330, I = 1, M
+                  TEMP = ZERO
+                  DO 320, L = 1, K
+                     TEMP = TEMP + A( L, I )*DCONJG( B( J, L ) )
+  320             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  330          CONTINUE
+  340       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 370, J = 1, N
+               DO 360, I = 1, M
+                  TEMP = ZERO
+                  DO 350, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  350             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  360          CONTINUE
+  370       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZGEMM .
+*
+      END
+      SUBROUTINE ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               IF( NOCONJ )THEN
+                  DO 90, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = 1, M
+                     TEMP = TEMP + DCONJG( A( I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               IF( NOCONJ )THEN
+                  DO 120, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = 1, M
+                     TEMP = TEMP + DCONJG( A( I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZGEMV .
+*
+      END
+      SUBROUTINE ZGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGERC  performs the rank 1 operation
+*
+*     A := alpha*x*conjg( y' ) + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGERC ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*DCONJG( Y( JY ) )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*DCONJG( Y( JY ) )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZGERC .
+*
+      END
+      SUBROUTINE ZGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGERU  performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGERU ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZGERU .
+*
+      END
+      SUBROUTINE ZHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( L + I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( KPLUS1, J ) )
+     $                         + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( L + I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( KPLUS1, J ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( 1, J ) )
+               L      = 1      - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( L + I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( 1, J ) )
+               L       = 1       - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( L + I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHBMV .
+*
+      END
+      SUBROUTINE ZHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars, A is an hermitian matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  hermitian matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  hermitian  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Note that the imaginary parts  of the diagonal elements need
+*           not be set, they are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*DCONJG( A( K, I ) )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*DCONJG( A( K, I ) )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*DBLE( A( J, J ) )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*DCONJG( A( J, K ) )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*DCONJG( A( J, K ) )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHEMM .
+*
+      END
+      SUBROUTINE ZHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHEMV .
+*
+      END
+      SUBROUTINE ZHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHER2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHER2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( J ) )
+                  TEMP2 = DCONJG( ALPHA*X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2 = DCONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*DCONJG( Y( J ) )
+                  TEMP2     = DCONJG( ALPHA*X( J ) )
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2     = DCONJG( ALPHA*X( JX ) )
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX        = JX
+                  IY        = JY
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHER2 .
+*
+      END
+      SUBROUTINE ZHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, BETA,
+     $                   C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS, UPLO
+      INTEGER            K, LDA, LDB, LDC, N
+      DOUBLE PRECISION   BETA
+      COMPLEX*16         ALPHA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHER2K  performs one of the hermitian rank 2k operations
+*
+*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
+*
+*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n
+*  hermitian matrix and  A and B  are  n by k matrices in the first case
+*  and  k by n  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
+*                                         conjg( alpha )*B*conjg( A' ) +
+*                                         beta*C.
+*
+*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
+*                                         conjg( alpha )*conjg( B' )*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCONJG, MAX
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     ..
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN
+         INFO = 2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) ) THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N ) ) THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHER2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND.
+     $    ( BETA.EQ.ONE ) ) )RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO ) THEN
+         IF( UPPER ) THEN
+            IF( BETA.EQ.DBLE( ZERO ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.DBLE( ZERO ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+                  DO 70 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
+*                   C.
+*
+         IF( UPPER ) THEN
+            DO 130 J = 1, N
+               IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                  DO 90 I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  DO 100 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 120 L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) )
+     $                 THEN
+                     TEMP1 = ALPHA*DCONJG( B( J, L ) )
+                     TEMP2 = DCONJG( ALPHA*A( J, L ) )
+                     DO 110 I = 1, J - 1
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                              B( I, L )*TEMP2
+  110                CONTINUE
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180 J = 1, N
+               IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                  DO 140 I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  DO 150 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 170 L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) )
+     $                 THEN
+                     TEMP1 = ALPHA*DCONJG( B( J, L ) )
+                     TEMP2 = DCONJG( ALPHA*A( J, L ) )
+                     DO 160 I = J + 1, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                              B( I, L )*TEMP2
+  160                CONTINUE
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 )
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
+*                   C.
+*
+         IF( UPPER ) THEN
+            DO 210 J = 1, N
+               DO 200 I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190 L = 1, K
+                     TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( I.EQ.J ) THEN
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     ELSE
+                        C( J, J ) = BETA*DBLE( C( J, J ) ) +
+     $                              DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 +
+     $                              DCONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240 J = 1, N
+               DO 230 I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220 L = 1, K
+                     TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J )
+  220             CONTINUE
+                  IF( I.EQ.J ) THEN
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     ELSE
+                        C( J, J ) = BETA*DBLE( C( J, J ) ) +
+     $                              DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 +
+     $                              DCONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHER2K.
+*
+      END
+      SUBROUTINE ZHER  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHER   performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( J )*TEMP )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( JX )*TEMP )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP      = ALPHA*DCONJG( X( J ) )
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( J ) )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP      = ALPHA*DCONJG( X( JX ) )
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( JX ) )
+                  IX        = JX
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHER  .
+*
+      END
+      SUBROUTINE ZHERK( UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS, UPLO
+      INTEGER            K, LDA, LDC, N
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHERK  performs one of the hermitian rank k operations
+*
+*     C := alpha*A*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*A + beta*C,
+*
+*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+*  matrix in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DCONJG, MAX
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      DOUBLE PRECISION   RTEMP
+      COMPLEX*16         TEMP
+*     ..
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN
+         INFO = 2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N ) ) THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHERK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND.
+     $    ( BETA.EQ.ONE ) ) )RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO ) THEN
+         IF( UPPER ) THEN
+            IF( BETA.EQ.ZERO ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+                  DO 70 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*        Form  C := alpha*A*conjg( A' ) + beta*C.
+*
+         IF( UPPER ) THEN
+            DO 130 J = 1, N
+               IF( BETA.EQ.ZERO ) THEN
+                  DO 90 I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  DO 100 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 120 L = 1, K
+                  IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN
+                     TEMP = ALPHA*DCONJG( A( J, L ) )
+                     DO 110 I = 1, J - 1
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( TEMP*A( I, L ) )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180 J = 1, N
+               IF( BETA.EQ.ZERO ) THEN
+                  DO 140 I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+                  DO 150 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 170 L = 1, K
+                  IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN
+                     TEMP = ALPHA*DCONJG( A( J, L ) )
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( TEMP*A( J, L ) )
+                     DO 160 I = J + 1, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*A + beta*C.
+*
+         IF( UPPER ) THEN
+            DO 220 J = 1, N
+               DO 200 I = 1, J - 1
+                  TEMP = ZERO
+                  DO 190 L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO ) THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+               RTEMP = ZERO
+               DO 210 L = 1, K
+                  RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J )
+  210          CONTINUE
+               IF( BETA.EQ.ZERO ) THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) )
+               END IF
+  220       CONTINUE
+         ELSE
+            DO 260 J = 1, N
+               RTEMP = ZERO
+               DO 230 L = 1, K
+                  RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J )
+  230          CONTINUE
+               IF( BETA.EQ.ZERO ) THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) )
+               END IF
+               DO 250 I = J + 1, N
+                  TEMP = ZERO
+                  DO 240 L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J )
+  240             CONTINUE
+                  IF( BETA.EQ.ZERO ) THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  250          CONTINUE
+  260       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHERK .
+*
+      END
+      SUBROUTINE ZHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + DCONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK + J - 1 ) )
+     $                         + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + DCONJG( AP( K ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK + J - 1 ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK ) )
+               K      = KK     + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + DCONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK ) )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + DCONJG( AP( K ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHPMV .
+*
+      END
+      SUBROUTINE ZHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPR2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( J ) )
+                  TEMP2 = DCONJG( ALPHA*X( J ) )
+                  K     = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) +
+     $                               DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2 = DCONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) +
+     $                               DBLE( X( JX )*TEMP1 +
+     $                                     Y( JY )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1   = ALPHA*DCONJG( Y( J ) )
+                  TEMP2   = DCONJG( ALPHA*X( J ) )
+                  AP( KK ) = DBLE( AP( KK ) ) +
+     $                       DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1    = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2    = DCONJG( ALPHA*X( JX ) )
+                  AP( KK ) = DBLE( AP( KK ) ) +
+     $                       DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX       = JX
+                  IY       = JY
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHPR2 .
+*
+      END
+      SUBROUTINE ZHPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPR    performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( J ) )
+                  K    = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+     $                               + DBLE( X( J )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+     $                               + DBLE( X( JX )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP     = ALPHA*DCONJG( X( J ) )
+                  AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( J ) )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP    = ALPHA*DCONJG( X( JX ) )
+                  AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( JX ) )
+                  IX      = JX
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHPR  .
+*
+      END
+      subroutine zrotg(ca,cb,c,s)
+      double complex ca,cb,s
+      double precision c
+      double precision norm,scale
+      double complex alpha
+      if (cdabs(ca) .ne. 0.0d0) go to 10
+         c = 0.0d0
+         s = (1.0d0,0.0d0)
+         ca = cb
+         go to 20
+   10 continue
+         scale = cdabs(ca) + cdabs(cb)
+         norm = scale*dsqrt((cdabs(ca/dcmplx(scale,0.0d0)))**2 +
+     *                      (cdabs(cb/dcmplx(scale,0.0d0)))**2)
+         alpha = ca /cdabs(ca)
+         c = cdabs(ca) / norm
+         s = alpha * dconjg(cb) / norm
+         ca = alpha * norm
+   20 continue
+      return
+      end
+      subroutine  zscal(n,za,zx,incx)
+c
+c     scales a vector by a constant.
+c     jack dongarra, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex za,zx(*)
+      integer i,incx,ix,n
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      do 10 i = 1,n
+        zx(ix) = za*zx(ix)
+        ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        zx(i) = za*zx(i)
+   30 continue
+      return
+      end
+      subroutine  zswap (n,zx,incx,zy,incy)
+c
+c     interchanges two vectors.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),ztemp
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ztemp = zx(ix)
+        zx(ix) = zy(iy)
+        zy(iy) = ztemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+   20 do 30 i = 1,n
+        ztemp = zx(i)
+        zx(i) = zy(i)
+        zy(i) = ztemp
+   30 continue
+      return
+      end
+      SUBROUTINE ZSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where  alpha and beta are scalars, A is a symmetric matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZSYMM .
+*
+      END
+      SUBROUTINE ZSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' +
+*                                         beta*C.
+*
+*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSYR2K.
+*
+      END
+      SUBROUTINE ZSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSYRK .
+*
+      END
+      SUBROUTINE ZTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 90, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( KPLUS1, J ) )
+                     DO 100, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 120, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( KPLUS1, J ) )
+                     DO 130, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 150, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( 1, J ) )
+                     DO 160, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 180, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( 1, J ) )
+                     DO 190, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTBMV .
+*
+      END
+      SUBROUTINE ZTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     DO 90, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 100, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     DO 120, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 130, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     DO 150, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 160, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( 1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     DO 180, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 190, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( 1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTBSV .
+*
+      END
+      SUBROUTINE ZTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK     - 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    - 1
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 120, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 130, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  K    = KK     + 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    + 1
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 180, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 190, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTPMV .
+*
+      END
+      SUBROUTINE ZTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    + 1
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 120, K = KK, KK + J - 2
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 130, K = KK, KK + J - 2
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    - 1
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK - N + J ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK - N + J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTPSV .
+*
+      END
+      SUBROUTINE ZTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX*16         ALPHA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 120, J = 1, N
+                  DO 110, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 90, K = 1, I - 1
+                           TEMP = TEMP + A( K, I )*B( K, J )
+   90                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*DCONJG( A( I, I ) )
+                        DO 100, K = 1, I - 1
+                           TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J )
+  100                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  110             CONTINUE
+  120          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 130, K = I + 1, M
+                           TEMP = TEMP + A( K, I )*B( K, J )
+  130                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*DCONJG( A( I, I ) )
+                        DO 140, K = I + 1, M
+                           TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J )
+  140                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 200, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 170, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  170             CONTINUE
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+  200          CONTINUE
+            ELSE
+               DO 240, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 210, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  210             CONTINUE
+                  DO 230, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 220, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  220                   CONTINUE
+                     END IF
+  230             CONTINUE
+  240          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ).
+*
+            IF( UPPER )THEN
+               DO 280, K = 1, N
+                  DO 260, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*DCONJG( A( J, K ) )
+                        END IF
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*DCONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            ELSE
+               DO 320, K = N, 1, -1
+                  DO 300, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*DCONJG( A( J, K ) )
+                        END IF
+                        DO 290, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  290                   CONTINUE
+                     END IF
+  300             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*DCONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 310, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  310                CONTINUE
+                  END IF
+  320          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRMM .
+*
+      END
+      SUBROUTINE ZTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + A( I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 120, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 130, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + A( I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 180, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 190, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRMV .
+*
+      END
+      SUBROUTINE ZTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX*16         ALPHA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B
+*           or    B := alpha*inv( conjg( A' ) )*B.
+*
+            IF( UPPER )THEN
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 110, K = 1, I - 1
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  110                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 120, K = 1, I - 1
+                           TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J )
+  120                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/DCONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  130             CONTINUE
+  140          CONTINUE
+            ELSE
+               DO 180, J = 1, N
+                  DO 170, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 150, K = I + 1, M
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  150                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 160, K = I + 1, M
+                           TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J )
+  160                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/DCONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  170             CONTINUE
+  180          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 230, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 190, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  190                CONTINUE
+                  END IF
+                  DO 210, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 220, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  220                CONTINUE
+                  END IF
+  230          CONTINUE
+            ELSE
+               DO 280, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 240, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  240                CONTINUE
+                  END IF
+                  DO 260, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 270, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' )
+*           or    B := alpha*B*inv( conjg( A' ) ).
+*
+            IF( UPPER )THEN
+               DO 330, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/DCONJG( A( K, K ) )
+                     END IF
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+                  DO 310, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = DCONJG( A( J, K ) )
+                        END IF
+                        DO 300, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  300                   CONTINUE
+                     END IF
+  310             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 320, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  320                CONTINUE
+                  END IF
+  330          CONTINUE
+            ELSE
+               DO 380, K = 1, N
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/DCONJG( A( K, K ) )
+                     END IF
+                     DO 340, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  340                CONTINUE
+                  END IF
+                  DO 360, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = DCONJG( A( J, K ) )
+                        END IF
+                        DO 350, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  350                   CONTINUE
+                     END IF
+  360             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 370, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  370                CONTINUE
+                  END IF
+  380          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRSM .
+*
+      END
+      SUBROUTINE ZTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 120, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 130, I = 1, J - 1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 180, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 190, I = N, J + 1, -1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRSV .
+*
+      END
diff --git a/src/byte_mpi.f b/src/byte_mpi.f
new file mode 100644
index 0000000..5815646
--- /dev/null
+++ b/src/byte_mpi.f
@@ -0,0 +1,209 @@
+      subroutine byte_sync_mpi(mpi_fh)
+
+#ifdef MPIIO
+      include 'mpif.h'
+      call MPI_file_sync(mpi_fh,ierr)
+#endif
+
+      return
+      end
+C--------------------------------------------------------------------------
+      subroutine byte_open_mpi(fname,mpi_fh)
+
+      include 'SIZE'
+      include 'RESTART'
+
+#ifdef MPIIO
+      include 'mpif.h'
+
+      character*132 fname
+
+      if(nid.eq.pid0 .or. nid.eq.pid0r) then
+c        write(*,*) nid, 'call MPI_file_open',fname
+        call MPI_file_open(nekcomm_io,fname,
+     &                     MPI_MODE_RDWR+MPI_MODE_CREATE,
+     &                     MPI_INFO_NULL,mpi_fh,ierr)
+        if(ierr.ne.0) then
+          write(6,*) 'ABORT: Error in byte_open_mpi ', ierr
+          call exitt
+        endif
+      endif
+#else
+      write(6,*) 'byte_open_mpi: No MPI-IO support!'
+      call exitt
+#endif
+      return
+      end
+C--------------------------------------------------------------------------
+      subroutine byte_read_mpi(buf,icount,iorank,mpi_fh)
+
+      include 'SIZE'
+      include 'RESTART'
+
+#ifdef MPIIO
+      include 'mpif.h'
+
+      real*4 buf(1)          ! buffer
+
+      if(nid.eq.pid0 .or. nid.eq.pid0r) then
+        iout = 4*icount ! icount is in 4-byte words
+        if(iorank.ge.0 .and. nid.ne.iorank) iout = 0
+c        write(*,*) 'byte_read_mpi', nid, iout/4
+#ifdef MPIIO_NOCOL
+        call MPI_file_read(mpi_fh,buf,iout,MPI_BYTE,
+     &                     MPI_STATUS_IGNORE,ierr)
+#else
+        call MPI_file_read_all(mpi_fh,buf,iout,MPI_BYTE,
+     &                         MPI_STATUS_IGNORE,ierr)
+#endif
+        if(ierr.ne.0) then
+          write(6,*) 'ABORT: Error in byte_read_mpi ', ierr
+          call exitt
+        endif
+      endif
+#else
+      write(6,*) 'byte_read_mpi: No MPI-IO support!'
+      call exitt
+#endif
+
+      return
+      end
+C--------------------------------------------------------------------------
+      subroutine byte_write_mpi(buf,icount,iorank,mpi_fh)
+
+      include 'SIZE'
+      include 'RESTART'
+
+#ifdef MPIIO
+      include 'mpif.h'
+
+      real*4 buf(1)          ! buffer
+
+      if(nid.eq.pid0 .or. nid.eq.pid0r) then
+        iout = 4*icount ! icount is in 4-byte words
+        if(iorank.ge.0 .and. nid.ne.iorank) iout = 0
+c        write(*,*) 'byte_write', nid, iout/4
+#ifdef MPIIO_NOCOL
+        call MPI_file_write(mpi_fh,buf,iout,MPI_BYTE,
+     &                      MPI_STATUS_IGNORE,ierr)
+#else
+        call MPI_file_write_all(mpi_fh,buf,iout,MPI_BYTE,
+     &                          MPI_STATUS_IGNORE,ierr)
+#endif
+        if(ierr.ne.0) then
+          write(6,*) 'ABORT: Error in byte_write_mpi ', ierr
+          call exitt
+        endif
+      endif
+#else
+      write(6,*) 'byte_write_mpi: No MPI-IO support!'
+      call exitt
+#endif
+
+      return
+      end
+C--------------------------------------------------------------------------
+      subroutine byte_close_mpi(mpi_fh)
+
+      include 'SIZE'
+      include 'RESTART'
+
+#ifdef MPIIO
+      include 'mpif.h'
+      if(nid.eq.pid0 .or. nid.eq.pid0r) then
+        call MPI_file_close(mpi_fh,ierr)
+      endif
+#else
+      if(nid.eq.0) write(6,*) 'byte_close_mpi: No MPI-IO support!'
+      call exitt
+#endif
+
+      return
+      end
+C--------------------------------------------------------------------------
+      subroutine byte_set_view(ioff_in,mpi_fh)
+
+      include 'SIZE'
+      include 'RESTART'
+
+#ifdef MPIIO
+      include 'mpif.h'
+      integer*8 ioff_in
+    
+      if(nid.eq.pid0 .or. nid.eq.pid0r) then
+         if(ioff_in.lt.0) then
+           write(6,*) 'byte_set_view: offset<0!'
+           call exitt
+         endif
+c         write(*,*) 'dataoffset', nid, ioff_in
+         call MPI_file_set_view(mpi_fh,ioff_in,MPI_BYTE,MPI_BYTE,
+     &                          'native',MPI_INFO_NULL,ierr)
+         if(ierr.ne.0) then
+           write(6,*) 'ABORT: Error in byte_set_view ', ierr
+           call exitt
+         endif
+      endif
+#endif
+
+      return
+      end
+C--------------------------------------------------------------------------
+      subroutine nek_comm_io(nn)
+
+      include 'SIZE'
+      include 'RESTART'
+      include 'PARALLEL'
+
+#ifdef MPIIO
+      include 'mpif.h'
+      common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal
+      common /scrns/  irank_io(0:lp-1)
+
+#ifdef MPIIO_NOCOL
+      if(nid.eq.0) then
+        j = 0
+        if(nid.eq.pid0 .or. nid.eq.pid0r) then
+          irank_io(j) = nid
+          j = j + 1
+        endif
+        do ir = 1,np-1
+          call csend(ir,idum,4,ir,0)           ! handshake
+          call crecv(ir,ibuf,4)
+          if(ibuf.gt.0) then 
+            irank_io(j) = ibuf
+            j = j + 1
+          endif 
+        enddo
+      else
+         mtype = nid
+         ibuf = -1
+         if(nid.eq.pid0) then
+           ibuf = nid
+         endif
+         call crecv(mtype,idum,4)                ! hand-shake
+         call csend(mtype,ibuf,4,0,0)            ! u4 :=: u8
+      endif
+
+      call bcast(irank_io,isize*nn)
+
+c      write(6,*) 'nid', nid, (irank_io(i),i=0,nn-1)
+
+      call mpi_comm_group (nekcomm,nekgroup,ierr)
+      if(ierr.gt.0) call exitt
+      call mpi_group_incl (nekgroup,nn,irank_io,nekgroup_io,ierr)
+      if(ierr.gt.0) call exitt
+      call mpi_comm_create(nekcomm,nekgroup_io,nekcomm_io,ierr)
+      if(ierr.gt.0) call exitt
+      call mpi_group_free (nekgroup_io,ierr)
+      if(ierr.gt.0) call exitt
+      call mpi_group_free (nekgroup,ierr)
+      if(ierr.gt.0) call exitt
+#else
+      nekcomm_io = nekcomm
+      return    
+#endif
+
+#endif
+
+      return
+      end
diff --git a/src/cg.f b/src/cg.f
new file mode 100644
index 0000000..b4214dd
--- /dev/null
+++ b/src/cg.f
@@ -0,0 +1,335 @@
+#ifdef TIMERS
+#define NBTIMER(a) a = dnekclock()
+#define STIMER(a) a = dnekclock_sync()
+#define ACCUMTIMER(b,a) b = b + (dnekclock()- a )
+#else
+#define NBTIMER(a)
+#define STIMER(a)
+#define ACCUMTIMER(a,b)
+#endif
+
+
+c-----------------------------------------------------------------------
+      subroutine cg(x,f,g,c,r,w,p,z,n,niter,flop_cg)
+
+#if defined(XSMM_DISPATCH)
+      USE :: LIBXSMM
+#endif
+
+      include 'SIZE'
+      include 'TIMER'
+
+
+c     Solve Ax=f where A is SPD and is invoked by ax()
+c
+c     Output:  x - vector of length n
+c
+c     Input:   f - vector of length n
+c     Input:   g - geometric factors for SEM operator
+c     Input:   c - inverse of the counting matrix
+c
+c     Work arrays:   r,w,p,z  - vectors of length n
+c
+c     User-provided ax(w,z,n) returns  w := Az,  
+c
+c     User-provided solveM(z,r,n) ) returns  z := M^-1 r,  
+c
+      parameter (lt=lx1*ly1*lz1*lelt)
+c     real ur(lt),us(lt),ut(lt)
+
+c     parameter (lxyz=lx1*ly1*lz1)
+c     real ur(lxyz),us(lxyz),ut(lxyz),wk(lxyz)
+
+      real x(n),f(n),r(n),w(n),p(n),z(n),g(1),c(n)
+      real rnorminit, fbeta, fpap, falpha, frnorm
+
+      integer*8 flop_cg
+      integer thread, numth, find, lind, fel, lel
+      integer omp_get_thread_num, omp_get_num_threads
+      integer fiter, tmt
+
+      pap = 0.0
+
+c     set machine tolerances
+      one = 1.
+      eps = 1.e-20
+      if (one+eps .eq. one) eps = 1.e-14
+      if (one+eps .eq. one) eps = 1.e-7
+
+      rtz1=1.0
+      miter = niter
+
+c$OMP PARALLEL DEFAULT(shared) PRIVATE(thread,numth,find,lind,iter,
+c$OMP&  fel,lel,rtz2,beta,alpha,alphm,rlim2,rtr0,tmt,ttemp1)
+
+      thread = 0
+      numth = 1
+#ifdef _OPENMP
+      thread = omp_get_thread_num()
+      numth = omp_get_num_threads()
+#endif
+      tmt = thread + 1
+
+      if (numth < nelt) then
+        fel = (thread*nelt)/numth + 1
+        lel = ((thread+1)*nelt)/numth
+      else
+        if (thread < nelt) then
+          fel = thread + 1
+          lel = fel
+        else
+          fel = nelt+1
+          lel = nelt
+        end if
+      end if
+
+      find = (fel-1) *(nx1*ny1*nz1)+1
+      lind = lel * (nx1*ny1*nz1)
+
+      NBTIMER(ttemp1)
+      call rzeroi(x,n,find,lind)
+      ACCUMTIMER(trzero(tmt), ttemp1)
+
+      NBTIMER(ttemp1)
+      call copyi(r,f,n,find,lind)
+      ACCUMTIMER(tcopy(tmt), ttemp1)
+
+      if (thread == 0) call mask (r)   ! Zero out Dirichlet conditions
+
+      gopi(tmt)=1
+      NBTIMER(ttemp1)
+      call glsc3i(rnorminit,r,c,r,n,find,lind)
+      ACCUMTIMER(tglsc3a(tmt), ttemp1)
+
+
+      do iter=1,miter
+#ifdef LOG
+         if ((nid.eq.0) .and. (thread.eq.0)) write(*,*) "iter = ", iter
+#endif
+         NBTIMER(ttemp1)
+         call solveMi(z,r,n,find,lind)    ! preconditioner here
+         ACCUMTIMER(tsolvem(tmt), ttemp1)
+
+         rtz2=rtz1                                                       ! OPS
+         gopi(tmt)=2
+         NBTIMER(ttemp1)
+         call glsc3i(rtz1,r,c,z,n,find,lind)
+         ACCUMTIMER(tglsc3b(tmt), ttemp1)
+
+         beta = rtz1/rtz2
+         if (iter.eq.1) beta=0.0
+
+         NBTIMER(ttemp1)
+         call add2s1i(p,z,beta,n,find,lind)                              ! 2n
+         ACCUMTIMER(tadd2s1(tmt), ttemp1)
+
+         call axi(w,p,g,n,fel,lel,find,lind)                             ! flopa
+
+         gopi(tmt)=3
+         NBTIMER(ttemp1)
+         call glsc3i(pap, w,c,p,n,find,lind)                             ! 3n
+         ACCUMTIMER(tglsc3c(tmt), ttemp1)
+
+         alpha=rtz1/pap
+         alphm=-alpha
+
+         NBTIMER(ttemp1)
+         call add2s2i(x,p,alpha,n,find,lind)                             ! 2n
+         ACCUMTIMER(tadd2s2b(tmt), ttemp1)
+
+         NBTIMER(ttemp1)
+         call add2s2i(r,w,alphm,n,find,lind)                             ! 2n
+         ACCUMTIMER(tadd2s2c(tmt), ttemp1)
+
+         gopi(tmt)=4
+         NBTIMER(ttemp1)
+         call  glsc3i(rtr, r,c,r,n,find,lind)                            ! 3n
+         ACCUMTIMER(tglsc3d(tmt), ttemp1)
+
+         if (iter.eq.1) rlim2 = rtr*eps**2
+         if (iter.eq.1) rtr0  = rtr
+         rnorm = sqrt(rtr)
+
+      enddo
+
+      if (thread == 0) then
+        fiter = iter
+        fbeta = beta
+        falpha= alpha
+        fpap  = pap
+        frnorm = rnorm
+      end if
+
+c$OMP END PARALLEL
+
+    6    format('cg:',i4,1p4e12.4)
+
+      if (nid.eq.0) then
+        write(6,6) 0,sqrt(rnorminit)
+        write(6,6) fiter,frnorm,falpha,fbeta,fpap
+      end if
+
+      flop_cg = flop_cg + (fiter-1)*15_8*n + 3_8*n
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine solveM(z,r,n)
+      real z(n),r(n)
+
+      call copy(z,r,n)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine axi(w,u,gxyz,n,fel,lel,find,lind) ! Matrix-vector product: w=A*u
+
+      include 'SIZE'
+      include 'TOTAL'
+      include 'TIMER'
+
+      parameter (lxyz=lx1*ly1*lz1)
+      real w(nx1*ny1*nz1,nelt),u(nx1*ny1*nz1,nelt)
+      real gxyz(2*ldim,nx1*ny1*nz1,nelt)
+      parameter (lt=lx1*ly1*lz1*lelt)
+
+      integer fel, lel, find, lind
+      integer e,thread, tmt, omp_get_thread_num
+
+      thread = 0
+#ifdef _OPENMP
+      thread = omp_get_thread_num()
+#endif
+      tmt = thread + 1
+
+      do e= fel, lel
+         call ax_e( w(1,e),u(1,e),gxyz(1,1,e))
+      enddo
+
+      NBTIMER(ttemp2)
+      call gs_op(gsh,w,1,1,0)  ! Gather-scatter operation  ! w   = QQ  w
+      ACCUMTIMER(tgsop(tmt),ttemp2)
+                                                           !            L
+      NBTIMER(ttemp2)
+      call add2s2i(w,u,.1,n,find,lind)
+      ACCUMTIMER(tadd2s2a(tmt),ttemp2)
+
+      if (find == 1) then
+        call mask(w)             ! Zero out Dirichlet conditions
+        nxyz=nx1*ny1*nz1
+        flop_a = flop_a + (19_8*nxyz+12_8*nx1*nxyz)*nelt
+      end if
+
+      return
+      end
+c-------------------------------------------------------------------------
+      subroutine ax1(w,u,n)
+      include 'SIZE'
+      real w(n),u(n)
+      real h2i
+  
+      h2i = (n+1)*(n+1)  
+      do i = 2,n-1
+         w(i)=h2i*(2*u(i)-u(i-1)-u(i+1))
+      enddo
+      w(1)  = h2i*(2*u(1)-u(2  ))
+      w(n)  = h2i*(2*u(n)-u(n-1))
+
+      return
+      end
+c-------------------------------------------------------------------------
+      subroutine ax_e(w,u,g) ! Local matrix-vector product
+
+      include 'SIZE'
+      include 'TOTAL'
+      include 'TIMER'
+
+      parameter (lxyz=lx1*ly1*lz1)
+      real w(lxyz),u(lxyz),g(2*ldim,lxyz)
+      real ur(nx1*ny1*nz1),us(nx1*ny1*nz1),ut(nx1*ny1*nz1)
+      integer thread, tmt, omp_get_thread_num
+
+      thread = 0
+#ifdef _OPENMP
+      thread = omp_get_thread_num()
+#endif
+      tmt = thread + 1
+
+      nxyz = nx1*ny1*nz1
+      n    = nx1-1
+
+      NBTIMER(ttemp3)
+      call local_grad3(ur,us,ut,u,n,dxm1,dxtm1)
+      ACCUMTIMER(tlocalgrad3(tmt),ttemp3)
+
+      NBTIMER(ttemp3)
+      do i=1,nxyz
+         wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i)
+         ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i)
+         wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i)
+         ur(i) = wr
+         us(i) = ws
+         ut(i) = wt
+      enddo
+      ACCUMTIMER(twrwswt(tmt),ttemp3)
+
+      NBTIMER(ttemp3)
+      call local_grad3_t(w,ur,us,ut,n,dxm1,dxtm1)
+      ACCUMTIMER(tlocalgrad3t(tmt),ttemp3)
+
+      return
+      end
+c-------------------------------------------------------------------------
+      subroutine local_grad3(ur,us,ut,u,n,D,Dt)
+c     Output: ur,us,ut         Input:u,n,D,Dt
+      real ur(0:n,0:n,0:n),us(0:n,0:n,0:n),ut(0:n,0:n,0:n)
+      real u (0:n,0:n,0:n)
+      real D (0:n,0:n),Dt(0:n,0:n)
+      integer e
+
+      m1 = n+1
+      m2 = m1*m1
+
+      call mxm(D ,m1,u,m1,ur,m2)
+      do k=0,n
+         call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1)
+      enddo
+      call mxm(u,m2,Dt,m1,ut,m1)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine local_grad3_t(u,ur,us,ut,N,D,Dt)
+c     Output: ur,us,ut         Input:u,N,D,Dt
+      real u (0:N,0:N,0:N)
+      real ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
+      real D (0:N,0:N),Dt(0:N,0:N)
+      real w (0:N,0:N,0:N)
+      integer e
+
+      m1 = N+1
+      m2 = m1*m1
+      m3 = m1*m1*m1
+
+      call mxm(Dt,m1,ur,m1,u,m2)
+
+      do k=0,N
+         call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1)
+      enddo
+      call add2(u,w,m3)
+
+      call mxm(ut,m2,D ,m1,w,m1)
+      call add2(u,w,m3)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mask(w)   ! Zero out Dirichlet conditions
+      include 'SIZE'
+      real w(1)
+
+      if (nid.eq.0) w(1) = 0.  ! suitable for solvability
+
+      return
+      end
+c-----------------------------------------------------------------------
diff --git a/src/comm_mpi.f b/src/comm_mpi.f
new file mode 100644
index 0000000..26f1bc0
--- /dev/null
+++ b/src/comm_mpi.f
@@ -0,0 +1,1212 @@
+c-----------------------------------------------------------------------
+      subroutine iniproc(intracomm)
+      include 'SIZE'
+      include 'PARALLEL'
+      include 'mpif.h'
+
+      common /nekmpi/ nid_,np_,nekcomm,nekgroup,nekreal
+
+      logical flag
+      integer provided
+
+      call mpi_initialized(mpi_is_initialized, ierr) !  Initialize MPI
+      if ( mpi_is_initialized .eq. 0 ) then
+#ifdef MPITHREADS
+        call mpi_init_thread (MPI_THREAD_MULTIPLE,provided,ierr)
+#else 
+        call mpi_init (ierr)
+#endif
+      endif
+
+      ! create communicator
+      call init_nek_comm(intracomm)
+      np  = np_
+      nid = nid_
+
+      if(nid.eq.0) call printHeader
+
+      ! check upper tag size limit
+      call mpi_attr_get(MPI_COMM_WORLD,MPI_TAG_UB,nval,flag,ierr)
+      if (nval.lt.(10000+max(lp,lelg))) then
+         if(nid.eq.0) write(6,*) 'WARNING: MPI_TAG_UB too small!', nval
+c        call exitt
+      endif
+
+      IF (NP.GT.LP) THEN
+         WRITE(6,*) 
+     $   'ERROR: Code compiled for a max of',LP,' processors.'
+         WRITE(6,*) 
+     $   'Recompile with LP =',NP,' or run with fewer processors.'
+         WRITE(6,*) 
+     $   'Aborting in routine INIPROC.'
+         call exitt
+      endif
+
+      ! set word size for REAL
+      wdsize=4
+      eps=1.0e-12
+      oneeps = 1.0+eps
+      if (oneeps.ne.1.0) then
+         wdsize=8
+      else
+         if(nid.eq.0) 
+     &     write(6,*) 'ABORT: single precision mode not supported!'
+         call exitt
+      endif
+      nekreal = mpi_real
+      if (wdsize.eq.8) nekreal = mpi_double_precision
+
+      ifdblas = .false.
+      if (wdsize.eq.8) ifdblas = .true.
+
+      ! set word size for INTEGER
+      ! HARDCODED since there is no secure way to detect an int overflow
+      isize = 4
+
+      ! set word size for LOGICAL
+      lsize = 4
+
+      ! set word size for CHARACTER
+      csize = 1
+c
+      PID = 0
+      NULLPID=0
+      NODE0=0
+      NODE= NID+1
+
+      if (nid.eq.0) then 
+         write(6,*) 'Number of processors:',np
+         WRITE(6,*) 'REAL    wdsize      :',WDSIZE
+         WRITE(6,*) 'INTEGER wdsize      :',ISIZE
+      endif
+
+      call crystal_setup(cr_h,nekcomm,np)  ! set cr handle to new instance
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine init_nek_comm(intracomm)
+      include 'mpif.h'
+      common /nekmpi/ nid_,np_,nekcomm,nekgroup,nekreal
+C
+      call create_comm(intracomm) ! set up nekton specific communicator
+c
+      nid_  = mynode()
+      np_   = numnodes()
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine gop( x, w, op, n)
+c
+c     Global vector commutative operation
+c
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+c
+      real x(n), w(n)
+      character*3 op
+c
+      if (op.eq.'+  ') then
+         call mpi_allreduce (x,w,n,nekreal,mpi_sum ,nekcomm,ierr)
+      elseif (op.EQ.'M  ') then
+         call mpi_allreduce (x,w,n,nekreal,mpi_max ,nekcomm,ierr)
+      elseif (op.EQ.'m  ') then
+         call mpi_allreduce (x,w,n,nekreal,mpi_min ,nekcomm,ierr)
+      elseif (op.EQ.'*  ') then
+         call mpi_allreduce (x,w,n,nekreal,mpi_prod,nekcomm,ierr)
+      else
+         write(6,*) nid,' OP ',op,' not supported.  ABORT in GOP.'
+         call exitt
+      endif
+
+      call copy(x,w,n)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine igop( x, w, op, n)
+c
+c     Global vector commutative operation
+c
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+
+      integer x(n), w(n)
+      character*3 op
+
+      if     (op.eq.'+  ') then
+        call mpi_allreduce (x,w,n,mpi_integer,mpi_sum ,nekcomm,ierr)
+      elseif (op.EQ.'M  ') then
+        call mpi_allreduce (x,w,n,mpi_integer,mpi_max ,nekcomm,ierr)
+      elseif (op.EQ.'m  ') then
+        call mpi_allreduce (x,w,n,mpi_integer,mpi_min ,nekcomm,ierr)
+      elseif (op.EQ.'*  ') then
+        call mpi_allreduce (x,w,n,mpi_integer,mpi_prod,nekcomm,ierr)
+      else
+        write(6,*) nid,' OP ',op,' not supported.  ABORT in igop.'
+        call exitt
+      endif
+
+      call icopy(x,w,n)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine i8gop( x, w, op, n)
+c
+c     Global vector commutative operation
+c
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+
+      integer*8 x(n), w(n)
+      character*3 op
+
+      if     (op.eq.'+  ') then
+        call mpi_allreduce (x,w,n,mpi_integer8,mpi_sum ,nekcomm,ierr)
+      elseif (op.EQ.'M  ') then
+        call mpi_allreduce (x,w,n,mpi_integer8,mpi_max ,nekcomm,ierr)
+      elseif (op.EQ.'m  ') then
+        call mpi_allreduce (x,w,n,mpi_integer8,mpi_min ,nekcomm,ierr)
+      elseif (op.EQ.'*  ') then
+        call mpi_allreduce (x,w,n,mpi_integer8,mpi_prod,nekcomm,ierr)
+      else
+        write(6,*) nid,' OP ',op,' not supported.  ABORT in igop.'
+        call exitt
+      endif
+
+      call i8copy(x,w,n)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine csend(mtype,buf,len,jnid,jpid)
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+      real*4 buf(1)
+
+      call mpi_send (buf,len,mpi_byte,jnid,mtype,nekcomm,ierr)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine crecv(mtype,buf,lenm)
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+      integer status(mpi_status_size)
+C
+      real*4 buf(1)
+      len = lenm
+      jnid = mpi_any_source
+
+      call mpi_recv (buf,len,mpi_byte
+     $              ,jnid,mtype,nekcomm,status,ierr)
+c
+      if (len.gt.lenm) then 
+          write(6,*) nid,'long message in mpi_crecv:',len,lenm
+          call exitt
+      endif
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine crecv3(mtype,buf,len,lenm)
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+      integer status(mpi_status_size)
+C
+      real*4 buf(1)
+      len = lenm
+      jnid = mpi_any_source
+
+      call mpi_recv (buf,len,mpi_byte
+     $            ,jnid,mtype,nekcomm,status,ierr)
+      call mpi_get_count (status,mpi_byte,len,ierr)
+c
+      if (len.gt.lenm) then 
+          write(6,*) nid,'long message in mpi_crecv:',len,lenm
+          call exitt
+      endif
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      integer function numnodes()
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+
+      call mpi_comm_size (nekcomm, numnodes , ierr)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      integer function mynode()
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+      integer myid
+
+      call mpi_comm_rank (nekcomm, myid, ierr)
+      mynode = myid
+
+      return
+      end
+c-----------------------------------------------------------------------
+      real*8 function dnekclock()
+      implicit none
+
+#if defined (MPITIMER)
+      include 'mpif.h'
+      dnekclock=mpi_wtime()
+#elif defined (BGQTIMER)
+      double precision readtimebase_double
+      external readtimebase_double
+      dnekclock = 0.625D-9*ReadTimeBase_Double()
+#elif defined (CGTTIMER)
+      double precision fclock_gettime
+      external fclock_gettime
+      dnekclock = fclock_gettime()
+#else 
+      integer*8 countval, countrate, countmax
+      double precision countd
+      call system_clock(countval, countrate, countmax)
+      countd = countval
+      dnekclock = countd/countrate
+#endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      real*8 function dnekclock_sync()
+      real*8 dnekclock
+      external dnekclock
+c
+      call nekgsync()
+      dnekclock_sync=dnekclock()
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine lbcast(ifif)
+C
+C     Broadcast logical variable to all processors.
+C
+      include 'SIZE'
+      include 'PARALLEL'
+      include 'mpif.h'
+
+      logical ifif
+
+      if (np.eq.1) return
+
+      item=0
+      if (ifif) item=1
+      call bcast(item,isize)
+      ifif=.false.
+      if (item.eq.1) ifif=.true.
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine bcast(buf,len)
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+      real*4 buf(1)
+
+      call mpi_bcast (buf,len,mpi_byte,0,nekcomm,ierr)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine create_comm(intracomm)
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+
+c      call mpi_comm_group (mpi_comm_world,itmp,ierr)
+c      call mpi_comm_create (mpi_comm_world,itmp,icomm,ierr)
+c      call mpi_group_free (itmp,ierr)
+
+      call mpi_comm_dup(intracomm,nekcomm,ierr)
+
+c     write(6,*) 'nekcomm:',nekcomm
+
+      return
+      end
+c-----------------------------------------------------------------------
+      function isend(msgtag,x,len,jnid,jpid)
+c
+c     Note: len in bytes
+c
+      integer x(1)
+C
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+C
+      call mpi_isend (x,len,mpi_byte,jnid,msgtag
+     $       ,nekcomm,imsg,ierr)
+      isend = imsg
+c     write(6,*) nid,' isend:',imsg,msgtag,len,jnid,(x(k),k=1,len/4)
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      function irecv(msgtag,x,len)
+c
+c     Note: len in bytes
+c
+      integer x(1)
+C
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+C
+      call mpi_irecv (x,len,mpi_byte,mpi_any_source,msgtag
+     $       ,nekcomm,imsg,ierr)
+      irecv = imsg
+c     write(6,*) nid,' irecv:',imsg,msgtag,len
+c
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine msgwait(imsg)
+c
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+      integer status(mpi_status_size)
+c
+c     write(6,*) nid,' msgwait:',imsg
+c
+      call mpi_wait (imsg,status,ierr)
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine nekgsync()
+
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+
+      call mpi_barrier(nekcomm,ierr)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine exittr(stringi,rdata,idata)
+      character*1 stringi(132)
+      character*1 stringo(132)
+      character*25 s25
+      include 'SIZE'
+      include 'TOTAL'
+
+      call blank(stringo,132)
+      call chcopy(stringo,stringi,132)
+      len = indx1(stringo,'$',1)
+      write(s25,25) rdata,idata
+   25 format(1x,1p1e14.6,i10)
+      call chcopy(stringo(len),s25,25)
+
+      if (nid.eq.0) write(6,1) (stringo(k),k=1,len+24)
+    1 format('EXIT: ',132a1)
+
+      call exitt
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine exitti(stringi,idata)
+      character*1 stringi(132)
+      character*1 stringo(132)
+      character*11 s11
+      include 'SIZE'
+      include 'TOTAL'
+
+      call blank(stringo,132)
+      call chcopy(stringo,stringi,132)
+      len = indx1(stringo,'$',1)
+      write(s11,11) idata
+   11 format(1x,i10)
+      call chcopy(stringo(len),s11,11)
+
+      if (nid.eq.0) write(6,1) (stringo(k),k=1,len+10)
+    1 format('EXIT: ',132a1)
+
+      call exitt
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine err_chk(ierr,string)
+      character*1 string(132)
+      character*1 ostring(132)
+      character*10 s10
+      include 'SIZE'
+      include 'TOTAL'
+
+      ierr = iglsum(ierr,1)
+      if(ierr.eq.0) return 
+
+      len = indx1(string,'$',1)
+      call blank(ostring,132)
+      write(s10,11) ierr
+   11 format(1x,' ierr=',i3)
+
+      call chcopy(ostring,string,len-1)
+      call chcopy(ostring(len),s10,10)
+
+      if (nid.eq.0) write(6,1) (ostring(k),k=1,len+10)
+    1 format('ERROR: ',132a1)
+
+      call exitt
+
+      return
+      end
+c
+c-----------------------------------------------------------------------
+      subroutine exitt0
+      include 'SIZE'
+      include 'TOTAL'
+      include 'mpif.h'
+
+#ifdef BGQ
+#define M_EXIT(X) exit_((X))
+#else
+#define M_EXIT(X) exit((X))
+#endif
+
+      real*4 papi_mflops
+      integer*8 papi_flops
+
+      if(nid.eq.0) write(6,*) 'Exitting....'
+
+c     call print_stack()
+
+      call mpi_finalize (ierr)
+      call M_EXIT(0)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine exitt
+      include 'SIZE'
+      include 'TOTAL'
+      include 'mpif.h'
+
+      real*4 papi_mflops
+      integer*8 papi_flops
+c
+      call nekgsync()
+
+#ifdef PAPI
+      call nek_flops(papi_flops,papi_mflops)
+#endif
+
+      tstop  = dnekclock()
+      ttotal = tstop-etimes
+      nxyz   = nx1*ny1*nz1
+
+      if (nid.eq.0) then 
+         dtmp1 = 0
+         dtmp2 = 0
+         dtmp3 = 0
+         if(istep.gt.0) then
+           dgp   = nvtot
+           dgp   = max(dgp,1.)
+           dtmp1 = np*ttime/(dgp*max(istep,1))
+           dtmp2 = ttime/max(istep,1)
+           dtmp3 = 1.*papi_flops/1e6
+         endif 
+         write(6,*) ' '
+         write(6,'(A)') 'call exitt: dying ...'
+         write(6,*) ' '
+c        call print_stack()
+         write(6,*) ' '
+         write(6,'(4(A,1p1e13.5,A,/))') 
+     &       'total elapsed time             : ',ttotal, ' sec'
+     &      ,'total solver time incl. I/O    : ',ttime , ' sec'
+     &      ,'time/timestep                  : ',dtmp2 , ' sec'
+     &      ,'CPU seconds/timestep/gridpt    : ',dtmp1 , ' sec'
+#ifdef PAPI
+         write(6,'(2(A,1g13.5,/))') 
+     &       'Gflops                         : ',dtmp3/1000.
+     &      ,'Gflops/s                       : ',papi_mflops/1000.
+#endif
+      endif 
+
+      nz1 = 1/(nx1-ny1)
+
+      call mpi_finalize (ierr)
+      call exit(0)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine printHeader
+
+
+      return
+      end
+c-----------------------------------------------------------------------
+      function igl_running_sum(in)
+c
+      include 'mpif.h'
+      common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
+      integer status(mpi_status_size)
+      integer x,w,r
+
+      x = in  ! running sum
+      w = in  ! working buff
+      r = 0   ! recv buff
+
+      call mpi_scan(x,r,1,mpi_integer,mpi_sum,nekcomm,ierr)
+      igl_running_sum = r
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine platform_timer(ivb) ! mxm, ping-pong, and all_reduce timer
+
+      include 'SIZE'
+      include 'TOTAL'
+
+
+      call mxm_test_all(nid,ivb)  ! measure mxm times
+c     call exitti('done mxm_test_all$',ivb)
+
+      call comm_test(ivb)         ! measure message-passing and all-reduce times
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine comm_test(ivb) ! measure message-passing and all-reduce times
+                                ! ivb = 0 --> minimal verbosity
+                                ! ivb = 1 --> fully verbose
+                                ! ivb = 2 --> smaller sample set(shorter)
+
+      include 'SIZE'
+      include 'PARALLEL'
+
+      call gop_test(ivb)   ! added, Jan. 8, 2008
+
+      log_np=log2(np)
+      np2 = 2**log_np
+      if (np2.eq.np) call gp2_test(ivb)   ! added, Jan. 8, 2008
+
+      io = 6
+      n512 = min(512,np-1)
+
+      do nodec=1,n512
+         nodeb=nodec
+         if (nodec.gt.256.and.np.gt.512)
+     $      nodeb = 256+(np-256)*(nodec-256)/(n512-256) - 1
+         nodeb = min(nodeb,np-1)
+
+         call pingpong(alphas,betas,0,nodeb,.0005,io,ivb)
+         if (nid.eq.0) write(6,1) nodeb,np,alphas,betas
+    1    format(2i10,1p2e15.7,' alpha beta')
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine pingpong(alphas,betas,nodea,nodeb,dt,io,ivb)
+
+      include 'SIZE'
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+
+      parameter  (lt=lx1*ly1*lz1*lelt)
+      parameter (mwd1 = 3*lt,mwd2=100 000, mwd = max(mwd1,mwd2))
+      common /scrns/ x(mwd),y(mwd)
+
+      include 'mpif.h'
+      integer status(mpi_status_size)
+
+      character*10 fname
+
+      if (nid.eq.nodea) then
+         write(fname,3) np,nodeb
+    3    format('t',i4.4,'.',i4.4)
+         if (io.ne.6) open (unit=io,file=fname)
+      endif
+
+      call nekgsync
+      call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s
+
+      nwds = 0
+      if (nid.eq.nodea.and.ivb.gt.0) write(io,*)
+
+      betas = 0  ! Reported inverse bandwidth
+      count = 0
+
+      do itest = 1,500
+
+         nloop = msg_vol/(nwds+2)
+         nloop = min(nloop,1000)
+         nloop = max(nloop,1)
+
+         len   = 8*nwds
+     
+         call ping_loop(t1,t0,len,nloop,nodea,nodeb,nid,x,y,x,y)
+
+         if (nid.eq.nodea) then
+            tmsg = (t1-t0)/(2*nloop)   ! 2*nloop--> Double Buffer
+            tmsg = tmsg / 2.           ! one-way cost = 1/2 round-trip
+            tpwd = tmsg                ! time-per-word
+            if (nwds.gt.0) tpwd = tmsg/nwds
+            if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd
+    1       format(3i6,i12,1p2e16.8,' pg')
+
+            if (nwds.eq.1) then
+               alphas = tmsg
+            elseif (nwds.gt.10000) then   ! "average" beta
+               betas = (betas*count + tpwd)/(count+1)
+               count = count + 1
+            endif
+         endif
+
+         if (ivb.eq.2) then
+            nwds = (nwds+1)*1.25
+         else
+            nwds = (nwds+1)*1.016
+         endif
+         if (nwds.gt.mwd) then
+c        if (nwds.gt.1024) then
+            if (nid.eq.nodea.and.io.ne.6) close(unit=io)
+            call nekgsync
+            return
+         endif
+
+      enddo
+
+      if (nid.eq.nodea.and.io.ne.6) close(unit=io)
+      call nekgsync
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine pingpongo(alphas,betas,nodea,nodeb,dt,io,ivb)
+
+      include 'SIZE'
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+
+      parameter  (lt=lx1*ly1*lz1*lelt)
+      parameter (mwd1 = 3*lt,mwd2=100 000, mwd = max(mwd1,mwd2))
+      common /scrns/ x(mwd),y(mwd)
+
+      include 'mpif.h'
+      integer status(mpi_status_size)
+
+      character*10 fname
+
+      if (nid.eq.nodea) then
+         write(fname,3) np,nodeb
+    3    format('t',i4.4,'.',i4.4)
+         if (io.ne.6) open (unit=io,file=fname)
+      endif
+
+      call nekgsync
+      call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s
+
+      nwds = 0
+      if (nid.eq.nodea.and.ivb.gt.0) write(io,*)
+
+      betas = 0  ! Reported inverse bandwidth
+      count = 0
+
+      do itest = 1,500
+         call nekgsync
+         nloop = msg_vol/(nwds+2)
+         nloop = min(nloop,1000)
+         nloop = max(nloop,1)
+
+         len   = 8*nwds
+         jnid = mpi_any_source
+
+         if (nid.eq.nodea) then
+
+            msg  = irecv(itest,y,1)
+            call csend(itest,x,1,nodeb,0)   ! Initiate send, to synch.
+            call msgwait(msg)
+
+            t0 = mpi_wtime ()
+            do i=1,nloop
+               call mpi_irecv(y,len,mpi_byte,mpi_any_source,i
+     $                        ,nekcomm,msg,ierr)
+               call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr)
+               call mpi_wait (msg,status,ierr)
+            enddo
+            t1 = mpi_wtime ()
+            tmsg = (t1-t0)/nloop
+            tmsg = tmsg / 2.       ! Round-trip message time = twice one-way
+            tpwd = tmsg
+            if (nwds.gt.0) tpwd = tmsg/nwds
+            if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd
+    1       format(3i6,i12,1p2e16.8,' pg')
+
+            if (nwds.eq.1) then
+               alphas = tmsg
+            elseif (nwds.gt.10000) then
+               betas = (betas*count + tpwd)/(count+1)
+               count = count + 1
+            endif
+
+         elseif (nid.eq.nodeb) then
+
+            call crecv(itest,y,1)           ! Initiate send, to synch.
+            call csend(itest,x,1,nodea,0)
+
+            t0 = dnekclock()
+            do i=1,nloop
+               call mpi_recv (y,len,mpi_byte
+     $               ,jnid,i,nekcomm,status,ierr)
+               call mpi_send (x,len,mpi_byte,nodea,i,nekcomm,ierr)
+            enddo
+            t1 = dnekclock()
+            tmsg = (t1-t0)/nloop
+
+         endif
+
+         nwds = (nwds+1)*1.016
+         if (nwds.gt.mwd) then
+            if (nid.eq.nodea.and.io.ne.6) close(unit=io)
+            call nekgsync
+            return
+         endif
+
+      enddo
+
+      if (nid.eq.nodea.and.io.ne.6) close(unit=io)
+      call nekgsync
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine get_msg_vol(msg_vol,dt,nodea,nodeb)
+      include 'SIZE'
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+      parameter (lt=lx1*ly1*lz1*lelt)
+      common /scrns/ x(3*lt),y(3*lt)
+!
+!     Est. msg vol for dt s
+!
+      msg_vol = 1000
+
+      nwds  = min(1000,lt)
+      nloop = 50
+ 
+      tmsg = 0.
+      call gop(tmsg,t1,'+  ',1)
+
+      len = 8*nwds
+      if (nid.eq.nodea) then
+
+         msg  = irecv(1,y,1)
+         call csend(1,x,1,nodeb,0)   ! Initiate send, to synch.
+         call msgwait(msg)
+
+         t0 = dnekclock()
+         do i=1,nloop
+            msg  = irecv(i,y,len)
+            call csend(i,x,len,nodeb,0)
+            call msgwait(msg)
+         enddo
+         t1   = dnekclock()
+         tmsg = (t1-t0)/nloop
+         tpwd = tmsg/nwds
+
+      elseif (nid.eq.nodeb) then
+
+         call crecv(1,y,1)           ! Initiate send, to synch.
+         call csend(1,x,1,nodea,0)
+
+         t0 = dnekclock()
+         do i=1,nloop
+            call crecv(i,y,len)
+            call csend(i,x,len,nodea,0)
+         enddo
+         t1   = dnekclock()
+         tmsg = (t1-t0)/nloop
+         tmsg = 0.
+
+      endif
+
+      call gop(tmsg,t1,'+  ',1)
+      msg_vol = nwds*(dt/tmsg)
+c     if (nid.eq.nodea) write(6,*) nid,msg_vol,nwds,dt,tmsg,' msgvol'
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine gop_test(ivb)
+      include 'SIZE'
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+      include 'mpif.h'
+      integer status(mpi_status_size)
+
+      parameter  (lt=lx1*ly1*lz1*lelt)
+      parameter (mwd = 3*lt)
+      common /scrns/ x(mwd),y(mwd)
+      common /scruz/ times(2,500)
+      common/scrcg/nwd(500)
+
+
+      nwds = 1
+      mtest = 0
+      do itest = 1,500
+         nwds = (nwds+1)*1.016
+         if (nwds.gt.mwd) goto 100
+         mtest = mtest+1
+         nwd(mtest) = nwds
+      enddo
+  100 continue
+
+      nwds = 1
+      do itest = mtest,1,-1
+
+         tiny = 1.e-27
+         call cfill(x,tiny,mwd)
+         nwds = nwd(itest)
+         call nekgsync
+
+
+         t0 = mpi_wtime ()
+         call gop(x,y,'+  ',nwds)
+         call gop(x,y,'+  ',nwds)
+         call gop(x,y,'+  ',nwds)
+         call gop(x,y,'+  ',nwds)
+         call gop(x,y,'+  ',nwds)
+         call gop(x,y,'+  ',nwds)
+         t1 = mpi_wtime ()
+
+         tmsg = (t1-t0)/6 ! six calls
+         tpwd = tmsg
+         if (nwds.gt.0) tpwd = tmsg/nwds
+         times(1,itest) = tmsg
+         times(2,itest) = tpwd
+
+      enddo
+  101 continue
+
+
+      if (nid.eq.0) then
+         nwds = 1
+         do itest=1,500
+            if (ivb.gt.0.or.itest.eq.1) 
+     $         write(6,1) np,nwds,(times(k,itest),k=1,2)
+    1       format(i9,i12,1p2e16.8,' gop')
+            nwds = (nwds+1)*1.016
+            if (nwds.gt.mwd) goto 102
+         enddo
+  102    continue
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine gp2_test(ivb)
+
+      include 'SIZE'
+      include 'mpif.h'
+
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+      integer status(mpi_status_size)
+
+      parameter  (lt=lx1*ly1*lz1*lelt)
+      parameter (mwd = 3*lt)
+      common /scrns/ x(mwd),y(mwd)
+      common /scruz/ times(2,500)
+
+      call rzero(x,mwd)
+
+      nwds = 1
+      do itest = 1,500
+         call gp2(x,y,'+  ',1,nid,np)
+
+         t0 = mpi_wtime ()
+         call gp2(x,y,'+  ',nwds,nid,np)
+         call gp2(x,y,'+  ',nwds,nid,np)
+         call gp2(x,y,'+  ',nwds,nid,np)
+         call gp2(x,y,'+  ',nwds,nid,np)
+         t1 = mpi_wtime ()
+
+         tmsg = (t1-t0)/4 ! four calls
+         tpwd = tmsg
+         if (nwds.gt.0) tpwd = tmsg/nwds
+         times(1,itest) = tmsg
+         times(2,itest) = tpwd
+
+         nwds = (nwds+1)*1.016
+         if (nwds.gt.mwd) goto 101
+      enddo
+  101 continue
+
+
+      if (nid.eq.0) then
+         nwds = 1
+         do itest=1,500
+            if (ivb.gt.0.or.itest.eq.1) 
+     $         write(6,1) np,nwds,(times(k,itest),k=1,2)
+    1       format(i9,i12,1p2e16.8,' gp2')
+            nwds = (nwds+1)*1.016
+            if (nwds.gt.mwd) goto 102
+         enddo
+  102    continue
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      integer function xor(m,n)
+c
+c  If NOT running on a parallel processor, it is sufficient to
+c  have this routine return a value of XOR=1.
+c
+c  Pick one of the following:
+c
+c  UNIX 4.2, f77:
+       XOR = OR(M,N)-AND(M,N)
+c
+c  Intel FTN286:
+c     XOR = M.NEQV.N
+c
+c  Ryan-McFarland Fortran
+C      XOR = IEOR(M,N)
+c
+c     XOR = 0
+c     IF(M.EQ.1 .OR.  N.EQ.1) XOR=1
+c     IF(M.EQ.0 .AND. N.EQ.0) XOR=0
+c     IF(M.EQ.1 .AND. N.EQ.1) XOR=0
+c     IF(M.GT.1 .OR.N.GT.1 .OR.M.LT.0.OR.N.LT.0) THEN
+c        PRINT*,'ERROR IN XOR'
+c        STOP
+c     ENDIF
+C
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine gp2( x, w, op, n, nid, np)
+c
+c     Global vector commutative operation using spanning tree.
+c
+c     Std. fan-in/fan-out
+
+      real x(n), w(n)
+      character*3 op
+
+      integer bit, bytes, cnt, diff, spsize, i, 
+     *   parent, troot, xor, root, lnp, log2
+      logical ifgot
+
+      integer type
+      save    type
+      data    type  /998/
+
+      type  = type+100
+      if (type.gt.9992) type=type-998
+      typer = type-1
+      bytes = 8*n
+
+      root    = 0
+      troot   = max0((nid/np)*np, root)
+      diff    = xor(nid,troot)
+      nullpid = 0
+
+c     Accumulate contributions from children, if any
+      level2=1
+    5 continue
+         level=level2
+         level2=level+level
+         if (mod(nid,level2).ne.0) goto 20
+            call crecv(type,w,bytes)
+            if (op.eq.'+  ') then
+               do i=1,n
+                  x(i) = x(i) + w(i)
+               enddo
+            elseif (op.eq.'*  ') then
+               do i=1,n
+                  x(i) = x(i) * w(i)
+               enddo
+            elseif (op.eq.'M  ') then
+               do i=1,n
+                  x(i) = max(x(i),w(i))
+               enddo
+            elseif (op.eq.'m  ') then
+               do i=1,n
+                  x(i) = min(x(i),w(i))
+               enddo
+            endif
+         if (level2.lt.np) goto 5
+
+c     Pass result back to parent
+   20 parent = nid-level
+      if (nid .ne. 0) call csend(type,x,bytes,parent,nullpid)
+
+c     Await final answer from node 0 via log_2 fan out
+      level=np/2
+      ifgot=.false.
+      if (nid.eq.root) ifgot=.true.
+
+      lnp = log2(np)
+      do i=1,lnp
+        if (ifgot) then
+           jnid=nid+level
+           call csend(typer,x,bytes,jnid,nullpid)
+        elseif (mod(nid,level).eq.0) then
+           call crecv(typer,x,bytes)
+           ifgot=.true.
+        endif
+        level=level/2
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine ping_loop1(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
+
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+
+      real x(1),y(1)
+
+      include 'mpif.h'
+      integer status(mpi_status_size)
+
+      i=0
+      if (nid.eq.nodea) then
+         call nekgsync
+         call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr)    ! 1b
+         call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr)        ! 1a
+c        call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr)        ! 1a
+         call msgwait(msg)                                          ! 1b
+
+         t0 = mpi_wtime ()
+         do i=1,nloop
+            call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b
+            call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr)     ! 2a
+c           call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr)     ! 2a
+            call mpi_wait (msg,status,ierr)                         ! 2b
+         enddo
+         t1 = mpi_wtime ()
+
+      elseif (nid.eq.nodeb) then
+
+         call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr)    ! 1a
+         call nekgsync
+         call mpi_wait (msg,status,ierr)                            ! 1a
+
+         j=i
+         do i=1,nloop
+            call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a
+c           call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr)     ! 1b
+            call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr)     ! 1b
+            call mpi_wait (msg,status,ierr)                         ! 2a
+            j=i
+         enddo
+c        call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr)        ! nb
+         call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr)        ! nb
+
+      else
+         call nekgsync
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine ping_loop2(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
+
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+
+      real x(1),y(1)
+
+      include 'mpif.h'
+      integer status(mpi_status_size)
+
+      i=0
+      if (nid.eq.nodea) then
+         call nekgsync
+         call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr)    ! 1b
+         call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr)        ! 1a
+         call msgwait(msg)                                          ! 1b
+
+         t0 = mpi_wtime ()
+         do i=1,nloop
+            call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr)     ! 2a
+            call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b
+            call mpi_wait (msg,status,ierr)                         ! 2b
+         enddo
+         t1 = mpi_wtime ()
+
+      elseif (nid.eq.nodeb) then
+
+         call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr)    ! 1a
+         call nekgsync
+         call mpi_wait (msg,status,ierr)                            ! 1a
+
+         j=i
+         do i=1,nloop
+            call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr)     ! 1b
+            call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a
+            call mpi_wait (msg,status,ierr)                         ! 2a
+            j=i
+         enddo
+         call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr)        ! nb
+
+      else
+         call nekgsync
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine ping_loop(t1,t0,len,nloop,nodea,nodeb,nid,x1,y1,x2,y2)
+c     Double Buffer : does 2*nloop timings
+
+      common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
+
+      real x1(1),y1(1),x2(1),y2(1)
+
+      include 'mpif.h'
+      integer status(mpi_status_size)
+
+      itag=1
+      if (nid.eq.nodea) then
+         call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr)   ! 1b 
+         call nekgsync
+
+
+         t0 = mpi_wtime ()
+         do i=1,nloop
+            call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr)     ! 1a 
+            call mpi_irecv(y2,len,mpi_byte,nodeb,itag,nekcomm,msg2,ierr)! 2b 
+            call mpi_wait (msg1,status,ierr)                            ! 1b
+            call mpi_send (x2,len,mpi_byte,nodeb,itag,nekcomm,ierr)     ! 2a 
+            call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr)! 3b 
+            call mpi_wait (msg2,status,ierr)                            ! 2b
+         enddo
+         t1 = mpi_wtime ()
+         call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr)        ! nb
+         call mpi_wait (msg1,status,ierr)                              ! nb
+
+      elseif (nid.eq.nodeb) then
+
+         call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr)   ! nb 
+         call nekgsync
+
+
+         do i=1,nloop
+            call mpi_wait (msg1,status,ierr)                            ! 1a
+            call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr)     ! 1b
+            call mpi_irecv(y2,len,mpi_byte,nodea,itag,nekcomm,msg2,ierr)! 2a
+            call mpi_wait (msg2,status,ierr)                            ! 2a 
+            call mpi_send (x2,len,mpi_byte,nodea,itag,nekcomm,ierr)     ! 2b
+            call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr)! 3a
+         enddo
+         call mpi_wait (msg1,status,ierr)                            ! 2a 
+         call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr)        ! nb
+
+      else
+         call nekgsync
+      endif
+
+      return
+      end
+
+
diff --git a/src/driver.f b/src/driver.f
new file mode 100644
index 0000000..f45aa50
--- /dev/null
+++ b/src/driver.f
@@ -0,0 +1,660 @@
+c-----------------------------------------------------------------------
+      program nekbone
+      
+      include 'SIZE'
+      include 'TOTAL'
+      include 'mpif.h'
+
+      parameter (lxyz = lx1*ly1*lz1)
+      parameter (lt=lxyz*lelt)
+
+      real ah(lx1*lx1),bh(lx1),ch(lx1*lx1),dh(lx1*lx1)
+     $    ,zpts(2*lx1),wght(2*lx1)
+      
+      real x(lt),f(lt),r(lt),w(lt),p(lt),z(lt),c(lt)
+      real g(6,lt)
+      real mfloplist(1024), avmflop
+      real tstart, tstop
+      integer icount
+
+      logical ifbrick
+      integer iel0,ielN,ielD   ! element range per proc.
+      integer nx0,nxN,nxD      ! poly. order range
+      integer npx,npy,npz      ! poly. order range
+      integer mx,my,mz         ! poly. order range
+      integer numthreads, omp_get_max_threads
+
+      call iniproc(mpi_comm_world)    ! has nekmpi common block
+      tstart = dnekclock()
+      call read_param(ifbrick,iel0,ielN,ielD,nx0,nxN,nxD,
+     +                npx,npy,npz,mx,my,mz)
+
+      numthreads = 1
+#ifdef _OPENMP
+      numthreads= omp_get_max_threads()
+#endif 
+
+      if (nid.eq.0) then
+        write(*,*) "Max number of threads: ", numthreads
+      end if
+
+c     GET PLATFORM CHARACTERISTICS
+c     iverbose = 1
+c     call platform_timer(iverbose)   ! iverbose=0 or 1
+
+      icount = 0
+
+#ifndef NITER 
+#define NITER 100
+#endif
+      niter = NITER
+
+      if (nid.eq.0) then
+        write(*,*) "Number of iterations: ", niter
+      end if
+
+#ifdef LOG
+#define WLOG(X) if (nid .eq. 0) write(*,*) X 
+#else 
+#define WLOG(X) 
+#endif
+
+c     SET UP and RUN NEKBONE
+      do nx1=nx0,nxN,nxD
+         WLOG("calling init_dim")
+         call init_dim
+         do nelt=iel0,ielN,ielD
+           WLOG("calling init_mesh")
+           call init_mesh(ifbrick,npx,npy,npz,mx,my,mz)
+           WLOG("calling prox_setupds")
+           call proxy_setupds    (gsh)     ! Has nekmpi common block
+           WLOG("calling set_multiplicity")
+           call set_multiplicity (c)       ! Inverse of counting matrix
+
+           WLOG("calling proxy_setup")
+           call proxy_setup(ah,bh,ch,dh,zpts,wght,g) 
+
+           n     = nx1*ny1*nz1*nelt
+
+           WLOG("calling set_f")
+           call set_f(f,c,n)
+           WLOG("calling cg")
+           call cg(x,f,g,c,r,w,p,z,n,niter,flop_cg)
+
+           WLOG("calling nekgsync")
+           call nekgsync()
+
+           WLOG("calling set_timer_flop_count")
+           call set_timer_flop_cnt(0)
+           WLOG("calling cg")
+           call cg(x,f,g,c,r,w,p,z,n,niter,flop_cg)
+           WLOG("calling set_timer_flop_count")
+           call set_timer_flop_cnt(1)
+
+           WLOG("calling gs_free")
+           call gs_free(gsh)
+
+           icount = icount + 1
+           mfloplist(icount)= mflops*np
+         enddo
+      enddo
+
+      avmflop = 0.0
+      do i = 1, icount
+        avmflop = avmflop + mfloplist(i)
+      end do
+
+      if (icount .ne. 0) then
+        avmflop = avmflop/icount
+      end if
+
+      if (nid .eq. 0) then
+        write(6,1) avmflop
+      end if
+    1 format('Av MFlops = ', 1pe12.4)
+
+c     TEST BANDWIDTH BISECTION CAPACITY
+c     call xfer(np,cr_h)
+
+      call nekgsync()
+      tstop = dnekclock()
+      if (nid .eq.0) write(*,*) "Total run time = ", tstop-tstart
+
+      call exitt0
+
+      end
+c--------------------------------------------------------------
+      subroutine set_f(f,c,n)
+      real f(n),c(n)
+      integer i
+      integer, allocatable :: seed(:)
+
+      call RANDOM_SEED(SIZE=i)
+      allocate(seed(i))
+      seed = 5
+      call RANDOM_SEED(PUT=seed(1:i))
+
+      do i=1,n
+         call RANDOM_NUMBER(f(i))
+      enddo
+
+      call dssum(f)
+      call col2 (f,c,n)
+
+      deallocate(seed)
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine init_dim
+
+C     Transfer array dimensions to common
+
+      include 'SIZE'
+      include 'INPUT'
+ 
+      ny1=nx1
+      nz1=nx1
+ 
+      ndim=ldim
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine init_mesh(ifbrick,npx,npy,npz,mx,my,mz)
+      include 'SIZE'
+      include 'TOTAL'
+      logical ifbrick
+      integer e,eg,offs
+ 
+
+      if(.not.ifbrick) then   ! A 1-D array of elements of length P*lelt
+  10     continue
+         nelx = nelt*np
+         nely = 1
+         nelz = 1
+   
+         do e=1,nelt
+            eg = e + nid*nelt
+            lglel(e) = eg
+         enddo
+      else              ! A 3-D block of elements 
+        if (npx*npy*npz .ne. np) then
+          call cubic(npx,npy,npz,np)  !xyz distribution of total proc
+        end if 
+        if (mx*my*mz .ne. nelt) then
+          call cubic(mx,my,mz,nelt)   !xyz distribution of elements per proc
+        end if 
+      
+c       if(mx.eq.nelt) goto 10
+
+        nelx = mx*npx
+        nely = my*npy 
+        nelz = mz*npz
+
+        e = 1
+        offs = (mod(nid,npx)*mx) + npx*(my*mx)*(mod(nid/npx,npy)) 
+     $      + (npx*npy)*(mx*my*mz)*(nid/(npx*npy))
+        do k = 0,mz-1
+        do j = 0,my-1
+        do i = 0,mx-1
+           eg = offs+i+(j*nelx)+(k*nelx*nely)+1
+           lglel(e) = eg
+           e        = e+1
+        enddo
+        enddo
+        enddo
+      endif
+
+      if (nid.eq.0) then
+        write(6,*) "Processes: npx= ", npx, " npy= ", npy, " npz= ", npz
+        write(6,*) "Local Elements: mx= ", mx, " my= ", my, " mz= ", mz
+        write(6,*) "Elements: nelx= ", nelx, " nely= ", nely,
+     &             " nelz= ", nelz
+      end if
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine cubic(mx,my,mz,np)
+
+      mx = np
+      my = 1
+      mz = 1
+      ratio = np
+
+      iroot3 = np**(1./3.) + 0.000001
+      do i= iroot3,1,-1
+        iz = i
+        myx = np/iz
+        nrem = np-myx*iz
+
+        if (nrem.eq.0) then
+          iroot2 = myx**(1./2.) + 0.000001
+          do j=iroot2,1,-1
+            iy = j
+            ix = myx/iy
+            nrem = myx-ix*iy
+            if (nrem.eq.0) goto 20
+          enddo
+   20     continue
+
+          if (ix < iy) then
+            it = ix
+            ix = iy
+            iy = it
+          end if      
+
+          if (ix < iz) then
+            it = ix
+            ix = iz
+            iz = it
+          end if      
+
+          if (iy < iz) then
+            it = iy
+            iy = iz
+            iz = it
+          end if      
+
+          if ( REAL(ix)/iz < ratio) then
+            ratio = REAL(ix)/iz
+            mx = ix
+            my = iy
+            mz = iz
+          end if 
+
+        end if
+      enddo
+
+      return
+      end
+
+c-----------------------------------------------------------------------
+      subroutine set_multiplicity (c)       ! Inverse of counting matrix
+      include 'SIZE'
+      include 'TOTAL'
+
+      real c(1)
+
+      n = nx1*ny1*nz1*nelt
+
+      call rone(c,n)
+      call gs_op(gsh,c,1,1,0)  ! Gather-scatter operation  ! w   = QQ  w
+
+      do i=1,n
+         c(i) = 1./c(i)
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine set_timer_flop_cnt(iset)
+      include 'SIZE'
+      include 'TOTAL'
+      include 'TIMER'
+
+      integer i, numThrd, totThd
+      integer omp_get_max_threads
+      real tmp1(20), tmp2(20), tmp3(20), tmp4(20)
+
+      real time0,time1
+      save time0,time1
+
+      if (iset.eq.0) then
+         flop_a  = 0
+         flop_cg = 0
+
+         do i = 1, tmax
+           trzero(i) = 0
+           tcopy(i) = 0
+           tsolvem(i) = 0
+           tglsc3a(i) = 0
+           tglsc3b(i) = 0
+           tglsc3c(i) = 0
+           tglsc3d(i) = 0
+           tadd2s1(i) = 0
+           tadd2s2a(i) = 0
+           tadd2s2b(i) = 0
+           tadd2s2c(i) = 0
+           tlocalgrad3(i) = 0
+           twrwswt(i) = 0
+           tlocalgrad3t(i) = 0
+           tgsop(i) = 0
+           tgop(1,i) = 0
+           tgop(2,i) = 0
+           tgop(3,i) = 0
+           tgop(4,i) = 0
+         end do
+
+         time0   = dnekclock()
+      else
+        time1   = dnekclock()-time0
+        if (time1.gt.0) mflops = (1.0*flop_a+1.0*flop_cg)/(1.e6*time1)
+
+        if (nid.eq.0) then
+          write(6,1) nelt,np,nx1, nelt*np
+          write(6,2) mflops*np, mflops
+          write(6,3) REAL(flop_a),REAL(flop_cg),time1
+        end if
+
+    1   format('nelt = ', i7, ', np = ', i9, ', nx1 = ', i7,
+     &         ', elements =', i10 )
+    2   format('Tot MFlops = ', 1pe12.5, ', MFlops = ', e12.5)
+    3   format('Ax FOp = ', 1pe12.5, ', CG FOp = ', e12.5,
+     &         ', Solve Time = ', e12.5)
+
+#ifdef TIMERS
+        numThrd = 1
+#ifdef _OPENMP
+        numThrd = omp_get_max_threads()
+#endif
+        totThd = numThrd*np
+
+        do i = 1, numThrd
+          tglsc3a(i) = tglsc3a(i) - tgop(1,i)
+          tglsc3b(i) = tglsc3b(i) - tgop(2,i)
+          tglsc3c(i) = tglsc3c(i) - tgop(3,i)
+          tglsc3d(i) = tglsc3d(i) - tgop(4,i)
+        end do
+
+        do i = 1,20
+          tmp1(i) = 0.0
+        end do
+        
+        tmp1(1)= time1
+        do i = 1, numThrd
+          tmp1(2)= tmp1(2) + trzero(i)
+          tmp1(3)= tmp1(3) + tcopy(i)
+          tmp1(4)= tmp1(4) + tsolvem(i)
+          tmp1(5)= tmp1(5) + tglsc3a(i)
+          tmp1(6)= tmp1(6) + tglsc3b(i)
+          tmp1(7)= tmp1(7) + tglsc3c(i)
+          tmp1(8)= tmp1(8) + tglsc3d(i)
+          tmp1(9)= tmp1(9) + tadd2s1(i)
+          tmp1(10)= tmp1(10) + tadd2s2a(i)
+          tmp1(11)= tmp1(11) + tadd2s2b(i)
+          tmp1(12)= tmp1(12) + tadd2s2c(i)
+          tmp1(13)= tmp1(13) + tlocalgrad3(i)
+          tmp1(14)= tmp1(14) + twrwswt(i)
+          tmp1(15)= tmp1(15) + tlocalgrad3t(i)
+          tmp1(16)= tmp1(16) + tgsop(i)
+          tmp1(17)= tmp1(17) + tgop(1,i)
+          tmp1(18)= tmp1(18) + tgop(2,i)
+          tmp1(19)= tmp1(19) + tgop(3,i)
+          tmp1(20)= tmp1(20) + tgop(4,i)
+        end do
+
+        call gop(tmp1, tmp4, '+  ', 20)
+
+        tmp2(1)= time1
+        tmp2(2)= trzero(1)
+        tmp2(3)= tcopy(1)
+        tmp2(4)= tsolvem(1)
+        tmp2(5)= tglsc3a(1)
+        tmp2(6)= tglsc3b(1)
+        tmp2(7)= tglsc3c(1)
+        tmp2(8)= tglsc3d(1)
+        tmp2(9)= tadd2s1(1)
+        tmp2(10)= tadd2s2a(1)
+        tmp2(11)= tadd2s2b(1)
+        tmp2(12)= tadd2s2c(1)
+        tmp2(13)= tlocalgrad3(1)
+        tmp2(14)= twrwswt(1)
+        tmp2(15)= tlocalgrad3t(1)
+        tmp2(16)= tgsop(1)
+        tmp2(17)= tgop(1,1)
+        tmp2(18)= tgop(2,1)
+        tmp2(19)= tgop(3,1)
+        tmp2(20)= tgop(4,1)
+
+        do i = 2, numThrd
+          if (trzero(i) < tmp2(2)) tmp2(2)= trzero(i)
+          if (tcopy(i) < tmp2(3)) tmp2(3)= tcopy(i)
+          if (tsolvem(i) < tmp2(4)) tmp2(4)= tsolvem(i)
+          if (tglsc3a(i) < tmp2(5)) tmp2(5)= tglsc3a(i)
+          if (tglsc3b(i) < tmp2(6)) tmp2(6)= tglsc3b(i)
+          if (tglsc3c(i) < tmp2(7)) tmp2(7)= tglsc3c(i)
+          if (tglsc3d(i) < tmp2(8)) tmp2(8)= tglsc3d(i)
+          if (tadd2s1(i) < tmp2(9)) tmp2(9)= tadd2s1(i)
+          if (tadd2s2a(i) < tmp2(10)) tmp2(10)= tadd2s2a(i)
+          if (tadd2s2b(i) < tmp2(11)) tmp2(11)= tadd2s2b(i)
+          if (tadd2s2c(i) < tmp2(12)) tmp2(12)= tadd2s2c(i)
+          if (tlocalgrad3(i) < tmp2(13)) tmp2(13)= tlocalgrad3(i)
+          if (twrwswt(i) < tmp2(14)) tmp2(14)= twrwswt(i)
+          if (tlocalgrad3t(i) < tmp2(15)) tmp2(15)= tlocalgrad3t(i)
+          if (tgsop(i) < tmp2(16)) tmp2(16)= tgsop(i)
+          if (tgop(1,i) < tmp2(17)) tmp2(17)= tgop(1,i)
+          if (tgop(2,i) < tmp2(18)) tmp2(18)= tgop(2,i)
+          if (tgop(3,i) < tmp2(19)) tmp2(19)= tgop(3,i)
+          if (tgop(4,i) < tmp2(20)) tmp2(20)= tgop(4,i)
+        end do
+
+        call gop(tmp2, tmp4, 'm  ', 20)
+
+        tmp3(1)= time1
+        tmp3(2)= trzero(1)
+        tmp3(3)= tcopy(1)
+        tmp3(4)= tsolvem(1)
+        tmp3(5)= tglsc3a(1)
+        tmp3(6)= tglsc3b(1)
+        tmp3(7)= tglsc3c(1)
+        tmp3(8)= tglsc3d(1)
+        tmp3(9)= tadd2s1(1)
+        tmp3(10)= tadd2s2a(1)
+        tmp3(11)= tadd2s2b(1)
+        tmp3(12)= tadd2s2c(1)
+        tmp3(13)= tlocalgrad3(1)
+        tmp3(14)= twrwswt(1)
+        tmp3(15)= tlocalgrad3t(1)
+        tmp3(16)= tgsop(1)
+        tmp3(17)= tgop(1,1)
+        tmp3(18)= tgop(2,1)
+        tmp3(19)= tgop(3,1)
+        tmp3(20)= tgop(4,1)
+
+        do i = 2, numThrd
+          if (trzero(i) > tmp3(2)) tmp3(2)= trzero(i)
+          if (tcopy(i) > tmp3(3)) tmp3(3)= tcopy(i)
+          if (tsolvem(i) > tmp3(4)) tmp3(4)= tsolvem(i)
+          if (tglsc3a(i) > tmp3(5)) tmp3(5)= tglsc3a(i)
+          if (tglsc3b(i) > tmp3(6)) tmp3(6)= tglsc3b(i)
+          if (tglsc3c(i) > tmp3(7)) tmp3(7)= tglsc3c(i)
+          if (tglsc3d(i) > tmp3(8)) tmp3(8)= tglsc3d(i)
+          if (tadd2s1(i) > tmp3(9)) tmp3(9)= tadd2s1(i)
+          if (tadd2s2a(i) > tmp3(10)) tmp3(10)= tadd2s2a(i)
+          if (tadd2s2b(i) > tmp3(11)) tmp3(11)= tadd2s2b(i)
+          if (tadd2s2c(i) > tmp3(12)) tmp3(12)= tadd2s2c(i)
+          if (tlocalgrad3(i) > tmp3(13)) tmp3(13)= tlocalgrad3(i)
+          if (twrwswt(i) > tmp3(14)) tmp3(14)= twrwswt(i)
+          if (tlocalgrad3t(i) > tmp3(15)) tmp3(15)= tlocalgrad3t(i)
+          if (tgsop(i) > tmp3(16)) tmp3(16)= tgsop(i)
+          if (tgop(1,i) > tmp3(17)) tmp3(17)= tgop(1,i)
+          if (tgop(2,i) > tmp3(18)) tmp3(18)= tgop(2,i)
+          if (tgop(3,i) > tmp3(19)) tmp3(19)= tgop(3,i)
+          if (tgop(4,i) > tmp3(20)) tmp3(20)= tgop(4,i)
+        end do
+
+        call gop(tmp3, tmp4, 'M  ', 20)
+
+        if (nid.eq.0) then
+          write(6,4) "time       = ",tmp1(1)/np, tmp2(1), tmp3(1)
+          write(6,4) "rzero      = ",tmp1(2)/totThd, tmp2(2), tmp3(2)
+          write(6,4) "copy       = ",tmp1(3)/totThd, tmp2(3), tmp3(3)
+          write(6,4) "glsc3a     = ",tmp1(5)/totThd, tmp2(5), tmp3(5)
+          write(6,4) "gopa       = ",tmp1(17)/totThd, tmp2(17), tmp3(17)
+          write(6,4) "solveM     = ",tmp1(4)/totThd, tmp2(4), tmp3(4)
+          write(6,4) "glsc3b     = ",tmp1(6)/totThd, tmp2(6), tmp3(6)
+          write(6,4) "gopb       = ",tmp1(18)/totThd, tmp2(18), tmp3(18)
+          write(6,4) "add2s1     = ",tmp1(9)/totThd, tmp2(9), tmp3(9)
+          write(6,4) "localgrad3 = ",tmp1(13)/totThd, tmp2(13), tmp3(13)
+          write(6,4) "wrwswt     = ",tmp1(14)/totThd, tmp2(14), tmp3(14)
+          write(6,4) "localgradt = ",tmp1(15)/totThd, tmp2(15), tmp3(15)
+          write(6,4) "gsop       = ",tmp1(16)/totThd, tmp2(16), tmp3(16)
+          write(6,4) "add2s2a    = ",tmp1(10)/totThd, tmp2(10), tmp3(10)
+          write(6,4) "glsc3c     = ",tmp1(7)/totThd, tmp2(7), tmp3(7)
+          write(6,4) "gopc       = ",tmp1(19)/totThd, tmp2(19), tmp3(19)
+          write(6,4) "add2s2b    = ",tmp1(11)/totThd, tmp2(11), tmp3(11)
+          write(6,4) "add2s2c    = ",tmp1(12)/totThd, tmp2(12), tmp3(12)
+          write(6,4) "glsc3d     = ",tmp1(8)/totThd, tmp2(8), tmp3(8)
+          write(6,4) "gopd       = ",tmp1(20)/totThd, tmp2(20), tmp3(20)
+        endif
+
+    4   format(A, 1pe12.4, e12.4, e12.4)
+
+c       if (nid.eq.0) then
+c         write(6,4) "av time: ", tmp2(1)/np, tmp2(2)/totThd,
+c    &               tmp2(3)/totThd, tmp2(4)/totThd, tmp2(5)/totThd
+c         write(6,5) "av time: ", tmp2(5)/totThd, tmp2(6)/totThd,
+c    &               tmp2(7)/totThd, tmp2(8)/totThd
+c       endif
+
+c       if (nid.eq.0) then
+c         write(6,4) "min time: ", tmp2(1), tmp2(2), tmp2(3),
+c    &               tmp2(4), tmp2(5)
+c         write(6,5) "min time: ", tmp2(5), tmp2(6), tmp2(7),
+c    &               tmp2(8)
+c       endif
+
+c       if (nid.eq.0) then
+c         write(6,4) "max time: ", tmp2(1), tmp2(2), tmp2(3),
+c    &               tmp2(4), tmp2(5)
+c         write(6,5) "max time: ", tmp2(5), tmp2(6), tmp2(7),
+c    &               tmp2(8)
+c       endif
+
+c   4   format(A, ' cg= ', 1pe12.4, ', zcm= ', e12.4,
+c    &         ', glsc3= ', e12.4, ', add2sx= ', e12.4,
+c    &         ', ax= ', e12.4)
+c   5   format(A, ' ax= ', 1pe12.4, ', add2s2= ', e12.4,
+c    &         ', gsop= ', e12.4, ', axe= ', e12.4)
+#endif
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine xfer(np,gsh)
+      include 'SIZE'
+      parameter(npts_max = lx1*ly1*lz1*lelt)
+
+      real buffer(2,npts_max)
+      integer ikey(npts_max)
+
+
+      nbuf = 800
+      npts = 1
+      do itest=1,200
+         npoints = npts*np
+
+         call load_points(buffer,nppp,npoints,npts,nbuf)
+         iend   = mod1(npoints,nbuf)
+         istart = 1
+         if(nid.ne.0)istart = iend+(nid-1)*nbuf+1
+         do i = 1,nppp
+            icount=istart+(i-1)
+            ikey(i)=mod(icount,np)
+         enddo
+
+         call nekgsync
+         time0 = dnekclock()
+         do loop=1,50
+            call crystal_tuple_transfer(gsh,nppp,npts_max,
+     $                ikey,1,ifake,0,buffer,2,1)
+         enddo
+         time1 = dnekclock()
+         etime = (time1-time0)/50
+
+         if (nid.eq.0) write(6,1) np,npts,npoints,etime
+   1     format(2i7,i10,1p1e12.4,' bandwidth' )
+         npts = 1.02*(npts+1)
+         if (npts.gt.npts_max) goto 100
+      enddo
+ 100  continue
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine load_points(buffer,nppp,npoints,npts,nbuf)
+      include 'SIZE'
+      include 'PARALLEL'
+
+      real buffer(2,nbuf)
+
+      nppp=0
+      if(nbuf.gt.npts) then
+       npass = 1+npoints/nbuf
+
+       do ipass = 1,npass
+          if(nid.eq.ipass.and.ipass.ne.npass) then
+            do i = 1,nbuf
+             buffer(1,i)=i
+             buffer(2,i)=nid
+            enddo
+            nppp=nbuf
+          elseif (npass.eq.ipass.and.nid.eq.0) then
+            mbuf=mod1(npoints,nbuf)
+            do i=1,mbuf
+               buffer(1,i)=i
+               buffer(2,i)=nid
+            enddo
+            nppp=mbuf
+          endif
+       enddo
+      else
+       do i = 1,npts
+          buffer(1,i)=i
+          buffer(2,i)=nid
+       enddo
+       nppp=npts
+      endif
+
+      return
+      end
+c----------------------------------------------------------------------
+      subroutine read_param(ifbrick,iel0,ielN,ielD,nx0,nxN,nxD,
+     +                      npx,npy,npz,mx,my,mz)
+      include 'SIZE'
+      logical ifbrick
+      integer iel0,ielN,ielD,nx0,nxN,nxD,npx,npy,npz,mx,my,mz
+
+      !open .rea
+      if(nid.eq.0) then
+         open(unit=9,file='data.rea',status='old') 
+         read(9,*,err=100) ifbrick
+         read(9,*,err=100) iel0,ielN,ielD
+         read(9,*,err=100) nx0,nxN,nxD
+         read(9,*,err=100) npx,npy,npz
+         read(9,*,err=100) mx,my,mz
+         close(9)
+      endif
+      call bcast(ifbrick,4)
+      call bcast(iel0,4)
+      call bcast(ielN,4)
+      call bcast(ielD,4)
+c     nx0=lx1
+c     nxN=lx1
+      call bcast(nx0,4)
+      call bcast(nxN,4)
+      call bcast(nxD,4)
+      call bcast(npx,4)
+      call bcast(npy,4)
+      call bcast(npz,4)
+      call bcast(mx,4)
+      call bcast(my,4)
+      call bcast(mz,4)
+      if(iel0.gt.ielN.or.nx0.gt.nxN) goto 200
+
+      return
+
+  100 continue
+      write(6,*) "ERROR READING data.rea....ABORT"
+      call exitt0
+
+  200 continue
+      write(6,*) "ERROR data.rea :: iel0 > ielN or nx0 > nxN :: ABORT"
+      call exitt0
+  
+      return
+      end
+c-----------------------------------------------------------------------
diff --git a/src/driver_comm.f b/src/driver_comm.f
new file mode 100644
index 0000000..d66e436
--- /dev/null
+++ b/src/driver_comm.f
@@ -0,0 +1,21 @@
+c-----------------------------------------------------------------------
+      program nekbone
+      
+      include 'SIZE'
+      include 'TOTAL'
+      include 'mpif.h'
+
+      parameter (lxyz = lx1*ly1*lz1)
+      parameter (lt=lxyz*lelt)
+
+
+      call iniproc(mpi_comm_world)    ! has nekmpi common block
+
+c     GET PLATFORM CHARACTERISTICS
+      iverbose = 1
+      call platform_timer(iverbose)   ! iverbose=0 or 1
+
+      call exitt0
+
+      end
+c--------------------------------------------------------------
diff --git a/src/jl/Makefile b/src/jl/Makefile
new file mode 100644
index 0000000..96c2f75
--- /dev/null
+++ b/src/jl/Makefile
@@ -0,0 +1,91 @@
+CC=mpicc -std=c99 --pedantic
+CFLAGS+=-DMPI
+CFLAGS+=-DPREFIX=jl_
+CFLAGS+=-DNO_NEK_EXITT
+CFLAGS+=-DGLOBAL_LONG
+LDFLAGS+=-lm
+
+#CFLAGS+=-DPRINT_MALLOCS=1
+
+CFLAGS+=-DUSE_NAIVE_BLAS
+#CFLAGS+=-DUSE_CBLAS
+#LDFLAGS+=-lcblas
+
+#CFLAGS+=-DAMG_DUMP
+CFLAGS+=-DGS_TIMING -DGS_BARRIER
+
+#CFLAGS+=-O0 -g
+CFLAGS+=-O3 -march=native
+
+CFLAGS+=-W -Wall -Wno-unused-function -Wno-unused-parameter
+#CFLAGS+=-Minform=warn
+
+CCCMD=$(CC) $(G) $(CFLAGS)
+LINKCMD=$(CC) $(G) $(LDFLAGS)
+#RLINKCMD = $(LD) -r
+.PHONY: cmds deps tests clean objects odepinfo
+
+TESTS=sort_test sort_test2 sarray_sort_test spchol_test \
+      comm_test crystal_test sarray_transfer_test \
+      gs_test gs_test_old gs_unique_test \
+      xxt_test xxt_test2 crs_test \
+      poly_test poly_test2 lob_bnd_test obbox_test \
+      findpts_el_2_test findpts_el_2_test2 \
+      findpts_el_3_test findpts_el_3_test2 \
+      findpts_local_test findpts_test
+
+CRS=$(AMG)
+
+tests: $(TESTS) ;
+clean: ; @$(RM) $(TESTS) *.o *.s
+
+cmds: ; @echo CC = $(CCCMD); echo LINK = $(LINKCMD);
+
+deps: ; ./cdep.py *.c > makefile.cdep;
+
+odepinfo: deps objects; @./odep_info.py *.o
+
+-include makefile.cdep
+
+%.o: %.c ; @echo CC $<; $(CCCMD) -c $< -o $@
+%.s: %.c ; @echo CC -S $<; $(CCCMD) -S $< -o $@
+objects: $(OBJECTS) ;
+
+poly_imp.h: gen_poly_imp.c
+	$(RM) poly_imp.h;
+	$(CC) -lgmp -lm gen_poly_imp.c -o gen_poly_imp;
+	./gen_poly_imp > poly_imp.h;
+	$(RM) gen_poly_imp
+
+GS_OBJECTS=gs.o sort.o sarray_transfer.o sarray_sort.o \
+           gs_local.o fail.o crystal.o comm.o tensor.o
+
+XXT=sparse_cholesky.o xxt.o
+AMG=amg.o
+
+sort_test: sort.o fail.o comm.o tensor.o gs_local.o sort_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+sort_test2: sort.o fail.o comm.o tensor.o gs_local.o sort_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+sarray_sort_test: sort.o fail.o comm.o tensor.o gs_local.o sarray_sort.o sarray_sort_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+spchol_test: sparse_cholesky.o sort.o fail.o comm.o tensor.o gs_local.o spchol_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+comm_test: fail.o comm.o tensor.o gs_local.o comm_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+crystal_test: fail.o crystal.o comm.o tensor.o gs_local.o crystal_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+sarray_transfer_test: sarray_transfer.o sarray_sort.o sort.o fail.o crystal.o comm.o tensor.o gs_local.o sarray_transfer_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+
+gs_test: gs_test.o $(GS_OBJECTS);		@echo LINK $@; $(LINKCMD) $^ -o $@
+gs_test_old: gs_test_old.o $(GS_OBJECTS);	@echo LINK $@; $(LINKCMD) $^ -o $@
+gs_unique_test: gs_unique_test.o $(GS_OBJECTS);	@echo LINK $@; $(LINKCMD) $^ -o $@
+xxt_test: xxt_test.o $(CRS) $(GS_OBJECTS);	@echo LINK $@; $(LINKCMD) $^ -o $@
+xxt_test2: xxt_test2.o $(CRS) $(GS_OBJECTS);	@echo LINK $@; $(LINKCMD) $^ -o $@
+crs_test: crs_test.o $(CRS) $(GS_OBJECTS);	@echo LINK $@; $(LINKCMD) $^ -o $@
+
+poly_test2: poly.o fail.o comm.o tensor.o gs_local.o poly_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+poly_test: poly.o fail.o comm.o tensor.o gs_local.o poly_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+lob_bnd_test: tensor.o poly.o lob_bnd.o fail.o comm.o gs_local.o lob_bnd_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+obbox_test: rand_elt_test.o poly.o obbox.o tensor.o lob_bnd.o fail.o comm.o gs_local.o obbox_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+findpts_el_2_test2: tensor.o rand_elt_test.o lob_bnd.o fail.o comm.o gs_local.o poly.o findpts_el_2.o findpts_el_2_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+findpts_el_2_test: poly.o fail.o comm.o tensor.o gs_local.o findpts_el_2.o findpts_el_2_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+findpts_el_3_test2: tensor.o rand_elt_test.o lob_bnd.o fail.o comm.o gs_local.o poly.o findpts_el_3.o findpts_el_3_test2.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+findpts_el_3_test: poly.o fail.o comm.o tensor.o gs_local.o findpts_el_3.o findpts_el_3_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+findpts_local_test: rand_elt_test.o lob_bnd.o fail.o comm.o tensor.o gs_local.o poly.o findpts_local.o sort.o sarray_sort.o obbox.o findpts_el_3.o findpts_el_2.o findpts_local_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+findpts_test: sarray_transfer.o sort.o rand_elt_test.o lob_bnd.o poly.o findpts.o sarray_sort.o findpts_local.o obbox.o tensor.o findpts_el_3.o findpts_el_2.o fail.o crystal.o comm.o gs_local.o findpts_test.o ; @echo LINK $@; $(LINKCMD) $^ -o $@
+
diff --git a/src/jl/README b/src/jl/README
new file mode 100644
index 0000000..36c66ab
--- /dev/null
+++ b/src/jl/README
@@ -0,0 +1,69 @@
+
+A high-level view of the code in this directory is as follows. See each header
+file listed for more documentation.
+
+The following headers are fundamental to most of the code.
+
+  name.h:    a given prefix is added to all external symbols;
+             determines how FORTRAN routines are named
+  types.h:   defines the integer types used everywhere (e.g., for array indices)
+  mem.h:     memory-management wrappers;
+             "array" type (generic dynamically sized array);
+             "buffer" type ( = char array )
+  comm.h:    wrappers for MPI calls (with alternative single proc versions)
+
+The Gather/Scatter library top-level interface is defined in "gs.h".
+The file "gs_defs.h" defines the datatypes and operations that it supports.
+
+There are two coarse solvers (XXT and AMG), which are not currently very well
+documented. The interface is given in "crs.h". 
+ 
+"findpts" is documented in "findpts.c". The idea is that during a run of an
+SEM code, we have a geometry map
+  (processor, element, r, s, t) -> (x, y, z)
+that defines our mesh. Within each element, the xyz coordinate is a
+polynomial function of the parametric r,s,t coordinates.
+"findpts" takes a distributed list of "(x,y,z)" points and computes the inverse
+of the above map.
+"findpts_eval" takes a list of "(proc,el,r,s,t)" coords, e.g., as returned by
+  "findpts", and interpolates a given field at each point.
+
+
+The "workhorses" of the implementations of much of the above are the
+"sarray_sort" and "sarray_transfer" routines, documented in the respective
+headers. The "array" type, defined in "mem.h", can be used to keep track of a
+dynamically sized array of (arbitrary) structs.
+
+  sarray_sort.h:     
+    sort an array of structs (locally/sequentially) by one or two of its fields
+  sarray_transfer.h:
+    transfer each struct in array to the processor specified by a given field
+    
+These in turn, are implemented using the lower-level routines of
+"sort.h", and "crystal_router.h".
+
+
+The "findpts" algorithm makes use of a number of lower-level routines
+possibly useful on their own.
+
+  poly.h:     computation of quadrature nodes; fast polynomial interpolation
+  lob_bnd.h:  (relatively) fast yet robust bounds for polynomials on [-1,1]^d
+  obbox.h:    oriented as well as axis-aligned bounding boxes for spectral els
+  tensor.h:   some tensor-product applications,
+                with BLAS ops delegated to Nek, cblas, or a naive imp
+
+All of the preprocessor macros that affect compilation are:
+  name.h:  PREFIX="..."    prefix added to all C external symbols
+          FPREFIX="..."    prefix added to all FORTRAN routines
+    UPCASE, UNDERSCORE   determines FORTRAN naming convention
+  types.h: USE_LONG, USE_LONG_LONG, GLOBAL_LONG, GLOBAL_LONG_LONG
+           determine the integer types used by all code
+  mem.h: PRINT_MALLOCS=1   (print all mem mngmt to stdout)
+  comm.h: MPI  (use MPI when defined;
+                otherwise, use a dummy single-proc implementation)
+  tensor.h: USE_CBLAS, USE_NAIVE_BLAS
+            (select BLAS implementation; default is Nek's mxm)
+  fail.c: NO_NEK_EXITT    when defined, don't call Nek's exitt routine
+  amg.c: AMG_BLOCK_ROWS   number of rows to read at a time (default=1200)
+         GS_TIMING        record timings for the matrix multiplies
+         GS_BARRIER       use a barrier to improve the quality of the timings
diff --git a/src/jl/c99.h b/src/jl/c99.h
new file mode 100644
index 0000000..a5a44e3
--- /dev/null
+++ b/src/jl/c99.h
@@ -0,0 +1,16 @@
+#ifndef C99_H
+#define C99_H
+
+#ifndef __STDC_VERSION__
+#  define NO_C99
+#elif __STDC_VERSION__ < 199901L
+#  define NO_C99
+#endif
+
+#ifdef NO_C99
+#  define restrict
+#  define inline
+#  undef NO_C99
+#endif
+
+#endif
diff --git a/src/jl/cdep.py b/src/jl/cdep.py
new file mode 100755
index 0000000..a0dd87a
--- /dev/null
+++ b/src/jl/cdep.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+
+import sys, os, re
+
+#mergestr = lambda x: reduce((lambda a,b: a+" "+b),x,"")
+
+pathjoin = lambda a,b: os.path.normpath(os.path.join(a,b))
+include_re = re.compile("\s*#\s*include\s*\"([^\"]*)\"")
+incmatch = lambda x: ( include_re.match(line) for line in open(x) )
+incline = lambda x,m: pathjoin(os.path.split(x)[0],m.group(1))
+incl = lambda x: [ incline(x,m) for m in incmatch(x) if m!=None ]
+includes = {}
+def get_include(x):
+	if not includes.has_key(x): includes[x] = incl(x)
+	return includes[x]
+
+def closure(seq,f):
+	v = [], [x for x in seq], set(x for x in seq)
+	while len(v[1]): [(v[1].append(y),v[2].add(y)) for y in 
+	  f((lambda x: (v[0].append(x),x)[1])(v[1].pop())) if not y in v[2]]
+	return v[0]
+
+src_files = sys.argv[1:]
+files = closure(src_files, get_include)
+deps = dict((x,closure(includes[x],lambda y: includes[y])) for x in src_files)
+
+obj = lambda x: os.path.splitext(x)[0]+".o"
+
+for x in src_files:
+	print obj(x)+": "+x+reduce((lambda a,b: a+" "+b),deps[x],"")
+
+print
+print "OBJECTS="+reduce((lambda a,b: a+" "+obj(b)),src_files,"")
diff --git a/src/jl/comm.c b/src/jl/comm.c
new file mode 100644
index 0000000..8e5c9a3
--- /dev/null
+++ b/src/jl/comm.c
@@ -0,0 +1,175 @@
+#include <stddef.h> /* for size_t */
+#include <stdlib.h> /* for exit */
+#include <string.h> /* memcpy */
+#include <limits.h> /* for gs identities */
+#include <float.h>  /* for gs identities */
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "tensor.h"
+#include "gs_defs.h"
+#include "gs_local.h"
+#include "comm.h"
+
+uint comm_gbl_id=0, comm_gbl_np=1;
+
+GS_DEFINE_IDENTITIES()
+GS_DEFINE_DOM_SIZES()
+
+static void scan_imp(void *scan, const struct comm *com, gs_dom dom, gs_op op,
+                     const void *v, uint vn, void *buffer)
+{
+  comm_req req[2];
+  size_t vsize = vn*gs_dom_size[dom];
+  const uint id=com->id, np=com->np;
+  uint n = np, c=1, odd=0, base=0;
+  void *buf[2];
+  void *red = (char*)scan+vsize;
+  buf[0]=buffer,buf[1]=(char*)buffer+vsize;
+  while(n>1) {
+    odd=(odd<<1)|(n&1);
+    c<<=1, n>>=1;
+    if(id>=base+n) c|=1, base+=n, n+=(odd&1);
+  }
+  gs_init_array(scan,vn,dom,op);
+  memcpy(red,v,vsize);
+  while(n<np) {
+    if(c&1) n-=(odd&1), base-=n;
+    c>>=1, n<<=1, n+=(odd&1);
+    odd>>=1;
+    if(base==id) {
+      comm_irecv(&req[0],com, buf[0],vsize, id+n/2,id+n/2);
+      comm_isend(&req[1],com, red   ,vsize, id+n/2,id);
+      comm_wait(req,2);
+      gs_gather_array(red,buf[0],vn,dom,op);
+    } else {
+      comm_irecv(&req[0],com, scan,vsize, base,base);
+      comm_isend(&req[1],com, red ,vsize, base,id);
+      comm_wait(req,2);
+      break;
+    }
+  }
+  while(n>1) {
+    if(base==id) {
+      comm_send(com, scan  ,2*vsize, id+n/2,id);
+    } else {
+      comm_recv(com, buffer,2*vsize, base,base);
+      gs_gather_array(scan,buf[0],vn,dom,op);
+      memcpy(red,buf[1],vsize);
+    }
+    odd=(odd<<1)|(n&1);
+    c<<=1, n>>=1;
+    if(id>=base+n) c|=1, base+=n, n+=(odd&1);
+  }
+}
+
+
+static void allreduce_imp(const struct comm *com, gs_dom dom, gs_op op,
+                          void *v, uint vn, void *buf)
+{
+  size_t total_size = vn*gs_dom_size[dom];
+  const uint id=com->id, np=com->np;
+  uint n = np, c=1, odd=0, base=0;
+  while(n>1) {
+    odd=(odd<<1)|(n&1);
+    c<<=1, n>>=1;
+    if(id>=base+n) c|=1, base+=n, n+=(odd&1);
+  }
+  while(n<np) {
+    if(c&1) n-=(odd&1), base-=n;
+    c>>=1, n<<=1, n+=(odd&1);
+    odd>>=1;
+    if(base==id) {
+      comm_recv(com, buf,total_size, id+n/2,id+n/2);
+      gs_gather_array(v,buf,vn, dom,op);
+    } else {
+      comm_send(com, v,total_size, base,id);
+      break;
+    }
+  }
+  while(n>1) {
+    if(base==id)
+      comm_send(com, v,total_size, id+n/2,id);
+    else
+      comm_recv(com, v,total_size, base,base);
+    odd=(odd<<1)|(n&1);
+    c<<=1, n>>=1;
+    if(id>=base+n) c|=1, base+=n, n+=(odd&1);
+  }
+}
+
+void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op,
+               const void *v, uint vn, void *buffer)
+{
+  scan_imp(scan, com,dom,op, v,vn, buffer);
+}
+
+void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op,
+                          void *v, uint vn, void *buf)
+{
+  if(vn==0) return;
+#ifdef MPI
+  {
+    MPI_Datatype mpitype;
+    MPI_Op mpiop;
+    #define DOMAIN_SWITCH() do { \
+      switch(dom) { case gs_double:    mpitype=MPI_DOUBLE;    break; \
+                    case gs_float:     mpitype=MPI_FLOAT;     break; \
+                    case gs_int:       mpitype=MPI_INT;       break; \
+                    case gs_long:      mpitype=MPI_LONG;      break; \
+     WHEN_LONG_LONG(case gs_long_long: mpitype=MPI_LONG_LONG; break;) \
+                  default:        goto comm_allreduce_byhand; \
+      } \
+    } while(0)
+    DOMAIN_SWITCH();
+    #undef DOMAIN_SWITCH
+    switch(op) { case gs_add: mpiop=MPI_SUM;  break;
+                 case gs_mul: mpiop=MPI_PROD; break;
+                 case gs_min: mpiop=MPI_MIN;  break;
+                 case gs_max: mpiop=MPI_MAX;  break;
+                 default:        goto comm_allreduce_byhand;
+    }
+    MPI_Allreduce(v,buf,vn,mpitype,mpiop,com->c);
+    memcpy(v,buf,vn*gs_dom_size[dom]);
+    return;
+  }
+#endif
+#ifdef MPI
+comm_allreduce_byhand:
+  allreduce_imp(com,dom,op, v,vn, buf);
+#endif
+}
+
+double comm_dot(const struct comm *comm, double *v, double *w, uint n)
+{
+  double s=tensor_dot(v,w,n),b;
+  comm_allreduce(comm,gs_double,gs_add, &s,1, &b);
+  return s;
+}
+
+/* T comm_reduce__T(const struct comm *comm, gs_op op, const T *in, uint n) */
+
+#define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break;
+#define SWITCH_OP(T,op) do switch(op) { \
+    GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0)
+
+#define WITH_OP(T,OP) \
+  do { T v = *in++; GS_DO_##OP(accum,v); } while(--n)
+
+#define DEFINE_REDUCE(T) \
+T PREFIXED_NAME(comm_reduce__##T)( \
+    const struct comm *comm, gs_op op, const T *in, uint n) \
+{                                                           \
+  T accum = gs_identity_##T[op], buf;                       \
+  if(n!=0) SWITCH_OP(T,op);                                 \
+  comm_allreduce(comm,gs_##T,op, &accum,1, &buf);           \
+  return accum;                                             \
+}
+
+GS_FOR_EACH_DOMAIN(DEFINE_REDUCE)
+
+#undef DEFINE_REDUCE
+#undef WITH_OP
+#undef SWITCH_OP
+#undef SWITCH_OP_CASE
+
diff --git a/src/jl/comm.h b/src/jl/comm.h
new file mode 100644
index 0000000..4d0ed3e
--- /dev/null
+++ b/src/jl/comm.h
@@ -0,0 +1,255 @@
+#ifndef COMM_H
+#define COMM_H
+
+/* requires:
+     <stddef.h>            for size_t
+     <stdlib.h>            for exit
+     "fail.h", "types.h"
+     "gs_defs.h"           for comm_allreduce, comm_scan, comm_reduce_T
+*/
+
+#if !defined(FAIL_H) || !defined(TYPES_H)
+#warning "comm.h" requires "fail.h" and "types.h"
+#endif
+
+/*
+  When the preprocessor macro MPI is defined, defines (very) thin wrappers
+  for the handful of used MPI routines. Alternatively, when MPI is not defined,
+  these wrappers become dummy routines suitable for a single process run.
+  No code outside of "comm.h" and "comm.c" makes use of MPI at all.
+
+  Basic usage:
+  
+    struct comm c;
+  
+    comm_init(&c, MPI_COMM_WORLD);  // initializes c using MPI_Comm_dup
+
+    comm_free(&c);
+  
+  Very thin MPI wrappers: (see below for implementation)
+
+    comm_send,_recv,_isend,_irecv,_time,_barrier
+    
+  Additionally, some reduction and scan routines are provided making use
+    of the definitions in "gs_defs.h" (provided this has been included first).
+
+  Example comm_allreduce usage:
+    
+    double v[5], buf[5];
+    comm_allreduce(&c, gs_double,gs_add, v,5,buf);
+      // Computes the vector sum of v across all procs, using
+      // buf as a scratch area. Delegates to MPI_Allreduce if possible.
+    
+  Example comm_scan usage:
+    
+    long in[5], out[2][5], buf[2][5];
+    comm_scan(out, &c,gs_long,gs_add, in,5,buf);
+      // out[0] will be the vector sum of "in" across procs with ids
+           *strictly* less than this one (exclusive behavior),
+         and out[1] will be the vector sum across all procs, as would
+           be computed with comm_allreduce.
+         Note: differs from MPI_Scan which has inclusive behavior
+  
+  Example comm_reduce_double, etc. usage:
+  
+    T out, in[10];
+    out = comm_reduce_T(&c, gs_max, in, 10);
+      // out will equal the largest element of "in",
+         across all processors
+      // T can be "double", "float", "int", "long", "slong", "sint", etc.
+         as defined in "gs_defs.h"
+         
+*/
+
+#ifdef MPI
+#include <mpi.h>
+typedef MPI_Comm comm_ext;
+typedef MPI_Request comm_req;
+#else
+typedef int comm_ext;
+typedef int comm_req;
+typedef int MPI_Fint;
+#endif
+
+#define comm_allreduce PREFIXED_NAME(comm_allreduce)
+#define comm_scan      PREFIXED_NAME(comm_scan     )
+#define comm_dot       PREFIXED_NAME(comm_dot      )
+
+/* global id, np vars strictly for diagnostic messages (fail.c) */
+#ifndef comm_gbl_id
+#define comm_gbl_id PREFIXED_NAME(comm_gbl_id)
+#define comm_gbl_np PREFIXED_NAME(comm_gbl_np)
+extern uint comm_gbl_id, comm_gbl_np;
+#endif
+
+struct comm {
+  uint id, np;
+  comm_ext c;
+};
+
+static void comm_init(struct comm *c, comm_ext ce);
+/* (macro) static void comm_init_check(struct comm *c, MPI_Fint ce, uint np); */
+/* (macro) static void comm_dup(struct comm *d, const struct comm *s); */
+static void comm_free(struct comm *c);
+static double comm_time(void);
+static void comm_barrier(const struct comm *c);
+static void comm_recv(const struct comm *c, void *p, size_t n,
+                      uint src, int tag);
+static void comm_send(const struct comm *c, void *p, size_t n,
+                      uint dst, int tag);
+static void comm_irecv(comm_req *req, const struct comm *c,
+                       void *p, size_t n, uint src, int tag);
+static void comm_isend(comm_req *req, const struct comm *c,
+                       void *p, size_t n, uint dst, int tag);
+static void comm_wait(comm_req *req, int n);
+
+double comm_dot(const struct comm *comm, double *v, double *w, uint n);
+
+#ifdef GS_DEFS_H
+void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op,
+                          void *v, uint vn, void *buf);
+void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op,
+               const void *v, uint vn, void *buffer);
+
+#define DEFINE_REDUCE(T) \
+T PREFIXED_NAME(comm_reduce__##T)( \
+    const struct comm *comm, gs_op op, const T *in, uint n); \
+static T comm_reduce_##T(const struct comm *c, gs_op op, const T *v, uint vn) \
+{ return PREFIXED_NAME(comm_reduce__##T)(c,op,v,vn); }
+GS_FOR_EACH_DOMAIN(DEFINE_REDUCE)
+#undef DEFINE_REDUCE
+
+#define comm_reduce_sint \
+    TYPE_LOCAL(comm_reduce_int,comm_reduce_long,comm_reduce_long_long)
+#define comm_reduce_slong \
+   TYPE_GLOBAL(comm_reduce_int,comm_reduce_long,comm_reduce_long_long)
+
+#endif
+
+/*----------------------------------------------------------------------------
+  Code for static (inline) functions
+  ----------------------------------------------------------------------------*/
+
+static void comm_init(struct comm *c, comm_ext ce)
+{
+#ifdef MPI
+  int i;
+  MPI_Comm_dup(ce, &c->c);
+  MPI_Comm_rank(c->c,&i), comm_gbl_id=c->id=i;
+  MPI_Comm_size(c->c,&i), comm_gbl_np=c->np=i;
+#else
+  c->id = 0, c->np = 1;
+#endif
+}
+
+static void comm_init_check_(struct comm *c, MPI_Fint ce, uint np,
+                             const char *file, unsigned line)
+{
+#ifdef MPI
+  comm_init(c,MPI_Comm_f2c(ce));
+  if(c->np != np)
+    fail(1,file,line,"comm_init_check: passed P=%u, "
+                     "but MPI_Comm_size gives P=%u",np,c->np);
+#else
+  comm_init(c,0);
+  if(np != 1)
+    fail(1,file,line,"comm_init_check: passed P=%u, "
+                     "but not compiled with -DMPI",np);
+#endif
+}
+#define comm_init_check(c,ce,np) comm_init_check_(c,ce,np,__FILE__,__LINE__)
+
+
+static void comm_dup_(struct comm *d, const struct comm *s,
+                      const char *file, unsigned line)
+{
+  d->id = s->id, d->np = s->np;
+#ifdef MPI
+  MPI_Comm_dup(s->c,&d->c);
+#else
+  if(s->np!=1) fail(1,file,line,"%s not compiled with -DMPI\n",file);
+#endif
+}
+#define comm_dup(d,s) comm_dup_(d,s,__FILE__,__LINE__)
+
+static void comm_free(struct comm *c)
+{
+#ifdef MPI
+  MPI_Comm_free(&c->c);
+#endif
+}
+
+static double comm_time(void)
+{
+#ifdef MPI
+  return MPI_Wtime();
+#else
+  return 0;
+#endif
+}
+
+static void comm_barrier(const struct comm *c)
+{
+#ifdef MPI
+  MPI_Barrier(c->c);
+#endif
+}
+
+static void comm_recv(const struct comm *c, void *p, size_t n,
+                      uint src, int tag)
+{
+#ifdef MPI
+# ifndef MPI_STATUS_IGNORE
+  MPI_Status stat;
+  MPI_Recv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,&stat);
+# else  
+  MPI_Recv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,MPI_STATUS_IGNORE);
+# endif
+#endif
+}
+
+static void comm_send(const struct comm *c, void *p, size_t n,
+                      uint dst, int tag)
+{
+#ifdef MPI
+  MPI_Send(p,n,MPI_UNSIGNED_CHAR,dst,tag,c->c);
+#endif
+}
+
+static void comm_irecv(comm_req *req, const struct comm *c,
+                       void *p, size_t n, uint src, int tag)
+{
+#ifdef MPI
+  MPI_Irecv(p,n,MPI_UNSIGNED_CHAR,src,tag,c->c,req);
+#endif
+}
+
+static void comm_isend(comm_req *req, const struct comm *c,
+                       void *p, size_t n, uint dst, int tag)
+{
+#ifdef MPI
+  MPI_Isend(p,n,MPI_UNSIGNED_CHAR,dst,tag,c->c,req);
+#endif
+}
+
+static void comm_wait(comm_req *req, int n)
+{
+#ifdef MPI
+# ifndef MPI_STATUSES_IGNORE
+  MPI_Status status[8];
+  while(n>=8) MPI_Waitall(8,req,status), req+=8, n-=8;
+  if(n>0) MPI_Waitall(n,req,status);
+# else
+  MPI_Waitall(n,req,MPI_STATUSES_IGNORE);
+# endif  
+#endif
+}
+
+static void comm_bcast(const struct comm *c, void *p, size_t n, uint root)
+{
+#ifdef MPI
+  MPI_Bcast(p,n,MPI_UNSIGNED_CHAR,root,c->c);
+#endif
+}
+
+#endif
diff --git a/src/jl/crs.h b/src/jl/crs.h
new file mode 100644
index 0000000..e2d0d36
--- /dev/null
+++ b/src/jl/crs.h
@@ -0,0 +1,24 @@
+#ifndef CRS_H
+#define CRS_H
+
+#if !defined(COMM_H)
+#warning "crs.h" requires "comm.h"
+#endif
+
+#define crs_setup PREFIXED_NAME(crs_setup)
+#define crs_solve PREFIXED_NAME(crs_solve)
+#define crs_stats PREFIXED_NAME(crs_stats)
+#define crs_free  PREFIXED_NAME(crs_free )
+
+struct crs_data;
+
+struct crs_data *crs_setup(
+  uint n, const ulong *id,
+  uint nz, const uint *Ai, const uint *Aj, const double *A,
+  uint null_space, const struct comm *comm);
+void crs_solve(double *x, struct crs_data *data, double *b);
+void crs_stats(struct crs_data *data);
+void crs_free(struct crs_data *data);
+
+#endif
+
diff --git a/src/jl/crs_test.c b/src/jl/crs_test.c
new file mode 100644
index 0000000..e5367d2
--- /dev/null
+++ b/src/jl/crs_test.c
@@ -0,0 +1,116 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "mem.h"
+#include "gs_defs.h"
+#include "comm.h"
+#include "gs.h"
+#include "crs.h"
+
+void test(const struct comm *const comm)
+{
+  const double A[16] = {  2, -1, -1,  0,
+                         -1,  2,  0, -1,
+                         -1,  0,  2, -1,
+                          0, -1, -1,  2 };
+  const uint Ai[16]  = { 0, 0, 0, 0,
+                         1, 1, 1, 1,
+                         2, 2, 2, 2,
+                         3, 3, 3, 3 },
+             Aj[16]  = { 0, 1, 2, 3,
+                         0, 1, 2, 3,
+                         0, 1, 2, 3,
+                         0, 1, 2, 3 };
+  ulong xid[4]; slong uid[4];
+  double x[4]={1,1,1,1}, b[4], bmean;
+  uint i, w, gn, px, py;
+  
+  slong *xgid=0; double *xg=0; struct gs_data *gsh;
+  
+  struct crs_data *crs;
+
+  w = ceil(sqrt(comm->np)); gn = (w+1)*(w+1);
+  
+  if(comm->id==0) printf("arranging procs in a %u x %u square\n", w, w);
+  
+  px = comm->id%w, py = comm->id/w;
+  b[0] = xid[0] = (w+1)*py    +px+1;
+  b[1] = xid[1] = (w+1)*py    +px+2;
+  b[2] = xid[2] = (w+1)*(py+1)+px+1;
+  b[3] = xid[3] = (w+1)*(py+1)+px+2;
+
+  gn = comm_reduce_slong(comm, gs_max, (const slong*)&xid[3],1);
+  bmean = comm_reduce_double(comm, gs_add, b,4)/gn;
+
+  gsh = gs_setup((const slong*)xid,4, comm,0,gs_crystal_router,0);
+  gs(x,gs_double,gs_add,0,gsh,0);
+  gs(b,gs_double,gs_add,0,gsh,0);
+  for(i=0;i<4;++i) b[i]=xid[i]-bmean/x[i];
+  gs(b,gs_double,gs_add,0,gsh,0);
+  gs_free(gsh);
+  
+  gsh = gs_setup((const slong*)xid,4, comm,1,gs_crystal_router,0);
+  for(i=0;i<4;++i) uid[i]=comm->id;
+  gs(uid,gs_slong,gs_min,0,gsh,0);
+  gs_free(gsh);
+  for(i=0;i<4;++i) uid[i] = (uid[i]==comm->id?(slong)xid[i]:-(slong)xid[i]);
+
+  if(comm->id==0) {
+    xgid = tmalloc(slong, gn);
+    xg   = tmalloc(double,gn);
+    for(i=0;i<gn;++i) xgid[i] = -(slong)(i+1);
+    for(i=0;i<4;++i) xgid[xid[i]-1] = uid[i];
+  }
+  gsh = gs_setup(comm->id?uid:xgid,comm->id?4:gn, comm,0,gs_crystal_router,0);
+
+
+  if(comm->id==0) for(i=0;i<4;++i) xg[xid[i]-1]=b[i];
+  gs(comm->id?b:xg,gs_double,gs_add, 0, gsh, 0);
+  if(comm->id==0) for(i=0;i<gn;++i) printf("b[%u] = %g\n",i,xg[i]);
+  for(i=0;i<4;++i) b[i]=xid[i]-bmean/x[i];
+
+  crs = crs_setup(4,xid, 16,Ai,Aj,A, 1, comm);
+
+  crs_solve(x,crs,b);
+
+  crs_stats(crs);
+
+  crs_free(crs);
+
+  if(comm->id==0) for(i=0;i<4;++i) xg[xid[i]-1]=x[i];
+  gs(comm->id?x:xg,gs_double,gs_add, 0, gsh, 0);
+  if(comm->id==0) for(i=0;i<gn;++i) printf("x[%u] = %g\n",i,xg[i]);
+
+  gs_free(gsh);  
+  if(comm->id==0) free(xg), free(xgid);
+}
+
+int main(int narg, char* arg[])
+{
+  comm_ext world; int np;
+  struct comm comm;
+#ifdef MPI
+  MPI_Init(&narg,&arg);
+  world = MPI_COMM_WORLD;
+  MPI_Comm_size(world,&np);
+#else
+  world=0, np=1;
+#endif
+
+  comm_init(&comm,world);
+  test(&comm);
+  comm_free(&comm);
+  
+#ifdef MPI
+  MPI_Finalize();
+#endif
+
+  return 0;
+}
+
diff --git a/src/jl/crystal.c b/src/jl/crystal.c
new file mode 100644
index 0000000..a0e8135
--- /dev/null
+++ b/src/jl/crystal.c
@@ -0,0 +1,141 @@
+/*------------------------------------------------------------------------------
+  
+  Crystal Router
+  
+  Accomplishes all-to-all communication in log P msgs per proc
+  The routine is low-level; the format of the input/output is an
+  array of integers, consisting of a sequence of messages with format:
+  
+      target proc
+      source proc
+      m
+      integer
+      integer
+      ...
+      integer  (m integers in total)
+
+  Before crystal_router is called, the source of each message should be
+  set to this proc id; upon return from crystal_router, the target of each
+  message will be this proc id.
+  
+  Example Usage:
+  
+    struct crystal cr;
+    
+    crystal_init(&cr, &comm);  // makes an internal copy of comm
+    
+    crystal.data.n = ... ;  // total number of integers (not bytes!)
+    buffer_reserve(&cr.data, crystal.n * sizeof(uint));
+    ... // fill cr.data.ptr with messages
+    crystal_router(&cr);
+    
+    crystal_free(&cr);
+    
+  ----------------------------------------------------------------------------*/
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "comm.h"
+#include "mem.h"
+
+#define crystal_init   PREFIXED_NAME(crystal_init  )
+#define crystal_free   PREFIXED_NAME(crystal_free  )
+#define crystal_router PREFIXED_NAME(crystal_router)
+
+struct crystal {
+  struct comm comm;
+  buffer data, work;
+};
+
+void crystal_init(struct crystal *p, const struct comm *comm)
+{
+  comm_dup(&p->comm, comm);
+  buffer_init(&p->data,1000);
+  buffer_init(&p->work,1000);
+}
+
+void crystal_free(struct crystal *p)
+{
+  comm_free(&p->comm);
+  buffer_free(&p->data);
+  buffer_free(&p->work);
+}
+
+static void uintcpy(uint *dst, const uint *src, uint n)
+{
+  if(dst+n<=src)    memcpy (dst,src,n*sizeof(uint));
+  else if(dst!=src) memmove(dst,src,n*sizeof(uint));
+}
+
+static uint crystal_move(struct crystal *p, uint cutoff, int send_hi)
+{
+  uint len, *src, *end;
+  uint *keep = p->data.ptr, *send;
+  uint n = p->data.n;
+  send = buffer_reserve(&p->work,n*sizeof(uint));
+  if(send_hi) { /* send hi, keep lo */
+    for(src=keep,end=keep+n; src<end; src+=len) {
+      len = 3 + src[2];
+      if(src[0]>=cutoff) memcpy (send,src,len*sizeof(uint)), send+=len;
+      else               uintcpy(keep,src,len),              keep+=len;
+    }
+  } else      { /* send lo, keep hi */
+    for(src=keep,end=keep+n; src<end; src+=len) {
+      len = 3 + src[2];
+      if(src[0]< cutoff) memcpy (send,src,len*sizeof(uint)), send+=len;
+      else               uintcpy(keep,src,len),              keep+=len;
+    }
+  }
+  p->data.n = keep - (uint*)p->data.ptr;
+  return      send - (uint*)p->work.ptr;
+}
+
+static void crystal_exchange(struct crystal *p, uint send_n, uint targ,
+                             int recvn, int tag)
+{
+  comm_req req[3];
+  uint count[2] = {0,0}, sum, *recv[2];
+
+  if(recvn)   
+    comm_irecv(&req[1],&p->comm, &count[0],sizeof(uint), targ        ,tag);
+  if(recvn==2)
+    comm_irecv(&req[2],&p->comm, &count[1],sizeof(uint), p->comm.id-1,tag);
+  comm_isend(&req[0],&p->comm, &send_n,sizeof(uint), targ,tag);
+  comm_wait(req,recvn+1);
+  
+  sum = p->data.n + count[0] + count[1];
+  buffer_reserve(&p->data,sum*sizeof(uint));
+  recv[0] = (uint*)p->data.ptr + p->data.n, recv[1] = recv[0] + count[0];
+  p->data.n = sum;
+  
+  if(recvn)    comm_irecv(&req[1],&p->comm,
+                          recv[0],count[0]*sizeof(uint), targ        ,tag+1);
+  if(recvn==2) comm_irecv(&req[2],&p->comm,
+                          recv[1],count[1]*sizeof(uint), p->comm.id-1,tag+1);
+  comm_isend(&req[0],&p->comm, p->work.ptr,send_n*sizeof(uint), targ,tag+1);
+  comm_wait(req,recvn+1);
+}
+
+void crystal_router(struct crystal *p)
+{
+  uint bl=0, bh, nl;
+  uint id = p->comm.id, n=p->comm.np;
+  uint send_n, targ, tag = 0;
+  int send_hi, recvn;
+  while(n>1) {
+    nl = (n+1)/2, bh = bl+nl;
+    send_hi = id<bh;
+    send_n = crystal_move(p,bh,send_hi);
+    recvn = 1, targ = n-1-(id-bl)+bl;
+    if(id==targ) targ=bh, recvn=0;
+    if(n&1 && id==bh) recvn=2;
+    crystal_exchange(p,send_n,targ,recvn,tag);
+    if(id<bh) n=nl; else n-=nl,bl=bh;
+    tag += 2;
+  }
+}
diff --git a/src/jl/crystal.h b/src/jl/crystal.h
new file mode 100644
index 0000000..b6d4582
--- /dev/null
+++ b/src/jl/crystal.h
@@ -0,0 +1,21 @@
+#ifndef CRYSTAL_H
+#define CRYSTAL_H
+
+#if !defined(COMM_H) || !defined(MEM_H)
+#warning "crystal.h" requires "comm.h" and "mem.h"
+#endif
+
+#define crystal_init   PREFIXED_NAME(crystal_init  )
+#define crystal_free   PREFIXED_NAME(crystal_free  )
+#define crystal_router PREFIXED_NAME(crystal_router)
+
+struct crystal {
+  struct comm comm;
+  buffer data, work;
+};
+
+void crystal_init(struct crystal *cr, const struct comm *comm);
+void crystal_free(struct crystal *cr);
+void crystal_router(struct crystal *cr);
+
+#endif
diff --git a/src/jl/crystal_test.c b/src/jl/crystal_test.c
new file mode 100644
index 0000000..c7f50df
--- /dev/null
+++ b/src/jl/crystal_test.c
@@ -0,0 +1,88 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "comm.h"
+#include "mem.h"
+#include "crystal.h"
+
+int main(int narg, char *arg[])
+{
+  comm_ext world; int np;
+  struct comm comm;
+  struct crystal cr;
+  uint i,sum, *data, *end;
+#ifdef MPI
+  MPI_Init(&narg,&arg);
+  world = MPI_COMM_WORLD;
+  MPI_Comm_size(world,&np);
+#else
+  world=0, np=1;
+#endif
+
+  comm_init(&comm,world);
+  
+  crystal_init(&cr,&comm);
+
+  cr.data.n = (4+(comm.id&1))*comm.np;
+  buffer_reserve(&cr.data,cr.data.n*sizeof(uint));
+  data = cr.data.ptr;
+  for(i=0;i<comm.np;++i, data+=3+data[2]) {
+    data[0] = i, data[1] = comm.id, data[2] = 1;
+    data[3] = 2*comm.id;
+    if(comm.id&1) data[2] = 2, data[4] = data[3]+1;
+  }
+
+#if 0
+  data = cr.data.ptr, end = data + cr.data.n;
+  for(;data!=end; data+=3+data[2]) {
+    uint i;
+    printf("%u -> %u:",data[1],data[0]);
+    for(i=0;i<data[2];++i) printf(" %u",data[3+i]);
+    printf("\n");
+  }
+#endif
+  
+  crystal_router(&cr);
+
+#if 0
+  printf("\n");
+  data = cr.data.ptr, end = data + cr.data.n;
+  for(;data!=end; data+=3+data[2]) {
+    uint i;
+    printf("%u <- %u:",data[0],data[1]);
+    for(i=0;i<data[2];++i) printf(" %u",data[3+i]);
+    printf("\n");
+  }
+#endif
+  
+  if(cr.data.n != comm.np*4 + (comm.np/2))
+    fail(1,__FILE__,__LINE__,"failure on %u",comm.id);
+  sum = 0;
+  data = cr.data.ptr, end = data + cr.data.n;
+  for(;data!=end; data+=3+data[2]) {
+    sum+=data[1];
+    if(data[3]!=data[1]*2)
+      fail(1,__FILE__,__LINE__,"failure on %u",comm.id);
+    if(data[1]&1 && (data[2]!=2 || data[4]!=data[3]+1))
+      fail(1,__FILE__,__LINE__,"failure on %u",comm.id);
+  }
+  if(sum != comm.np*(comm.np-1)/2)
+    fail(1,__FILE__,__LINE__,"failure on %u",comm.id);
+
+  crystal_free(&cr);
+  comm_free(&comm);
+
+  diagnostic("",__FILE__,__LINE__,
+    "test successful %u/%u",(unsigned)comm.id,(unsigned)comm.np);
+  
+#ifdef MPI
+  MPI_Finalize();
+#endif
+
+  return 0;
+}
diff --git a/src/jl/fail.c b/src/jl/fail.c
new file mode 100644
index 0000000..718aa36
--- /dev/null
+++ b/src/jl/fail.c
@@ -0,0 +1,53 @@
+#include <stdio.h>  /* sprintf, vfprintf, stdout */
+#include <stdarg.h> /* va_list, va_start, ... */
+#include <stdlib.h> /* exit */
+#include <string.h> /* memcpy, and str* functions in comm_fail */
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "comm.h"
+
+#define nek_exitt FORTRAN_UNPREFIXED(exitt,EXITT)
+void die(int status)
+{
+#ifdef NO_NEK_EXITT
+  if(comm_gbl_id==0) exit(status); else for(;;) ;
+#else
+  exit(status);
+#endif  
+}
+
+void vdiagnostic(const char *prefix, const char *file, unsigned line,
+                 const char *fmt, va_list ap)
+{
+  static char buf[2048]; int n,na,i=0;
+  sprintf(buf,"%s(proc %04d, %s:%d): ",prefix,(int)comm_gbl_id,file,line);
+  vsprintf(buf+strlen(buf),fmt,ap);
+  strcat(buf,"\n");
+  n=strlen(buf);
+  while(n && (na=fwrite(buf+i,1,n,stdout))) n-=na, i+=na;
+  fflush(stdout);
+}
+
+void diagnostic(const char *prefix, const char *file, unsigned line,
+                const char *fmt, ...)
+{
+  va_list ap; va_start(ap,fmt);
+  vdiagnostic(prefix,file,line,fmt,ap);
+  va_end(ap);
+}
+
+void vfail(int status, const char *file, unsigned line,
+           const char *fmt, va_list ap)
+{
+  vdiagnostic("ERROR ",file,line,fmt,ap);
+  die(status);
+}
+
+void fail(int status, const char *file, unsigned line,
+          const char *fmt, ...)
+{
+  va_list ap; va_start(ap,fmt);
+  vfail(status,file,line,fmt,ap);
+  va_end(ap);
+}
diff --git a/src/jl/fail.h b/src/jl/fail.h
new file mode 100644
index 0000000..0185110
--- /dev/null
+++ b/src/jl/fail.h
@@ -0,0 +1,52 @@
+#ifndef FAIL_H
+#define FAIL_H
+
+#if !defined(NAME_H)
+#warning "fail.h" requires "name.h"
+#endif
+
+#define  die        PREFIXED_NAME( die       )
+#define vdiagnostic PREFIXED_NAME(vdiagnostic)
+#define  diagnostic PREFIXED_NAME( diagnostic)
+#define vfail       PREFIXED_NAME(vfail      )
+#define  fail       PREFIXED_NAME( fail      )
+
+#ifdef __GNUC__
+#  define ATTRBD   __attribute__ ((noreturn))
+#  define ATTRB4V  __attribute__ ((format(printf,4,0)))
+#  define ATTRB4   __attribute__ ((format(printf,4,5)))
+#  define ATTRB4DV __attribute__ ((noreturn,format(printf,4,0)))
+#  define ATTRB4D  __attribute__ ((noreturn,format(printf,4,5)))
+#else
+#  define ATTRBD
+#  define ATTRB4V
+#  define ATTRB4
+#  define ATTRB4DV
+#  define ATTRB4D
+#endif
+
+#define DEF_FUNS() \
+  void  die(int status) ATTRBD; \
+  void  diagnostic(const char *prefix, const char *file, unsigned line, \
+                   const char *fmt, ...) ATTRB4  ; \
+  void  fail      (int status,         const char *file, unsigned line, \
+                   const char *fmt, ...) ATTRB4D ;
+#define VDEF_FUNS() \
+  void vdiagnostic(const char *prefix, const char *file, unsigned line, \
+                   const char *fmt, va_list ap) ATTRB4V  ; \
+  void vfail      (int status,         const char *file, unsigned line, \
+                   const char *fmt, va_list ap) ATTRB4DV ;
+DEF_FUNS()
+#ifdef va_arg
+VDEF_FUNS()
+#endif
+
+#undef VDEF_FUNS
+#undef DEF_FUNS
+#undef ATTRB4D
+#undef ATTRB4DV
+#undef ATTRB4
+#undef ATTRB4V
+#undef ATTRBD
+
+#endif
diff --git a/src/jl/fcrystal.c b/src/jl/fcrystal.c
new file mode 100644
index 0000000..3fe4c9a
--- /dev/null
+++ b/src/jl/fcrystal.c
@@ -0,0 +1,191 @@
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "mem.h"
+#include "comm.h"
+#include "crystal.h"
+#include "sort.h"
+#include "sarray_sort.h"
+#include "sarray_transfer.h"
+
+/*--------------------------------------------------------------------------
+
+  FORTRAN Interface to crystal router
+   
+  integer h, np
+  MPI_Comm comm
+  call crystal_setup(h,comm,np)  ! set h to handle to new instance
+  ! it is a runtime error if MPI_Comm_size gives a value different than np
+  call crystal_free(h)         ! release instance
+
+  integer*? ituple(m,max)   ! integer type matching sint from "types.h"
+  call crystal_ituple_transfer(h, ituple,m,n,max, kp)
+    - moves each column ituple(:,i), 1 <= i <= n,
+      to proc ituple(kp,i)
+    - sets n to the number of columns received,
+      which may be larger than max (indicating loss of n-max columns)
+    - also sets ituple(kp,i) to the source proc of column ituple(:,i)
+
+  call crystal_ituple_sort(h, ituple,m,n, key,nkey)
+    - locally sorts columns ituple(:,1...n) in ascending order,
+      ranked by ituple(key(1),i),
+           then ituple(key(2),i),
+           ...
+           then ituple(key(nkey),i)
+    - no communication; h used for scratch area
+    - linear time
+    - assumes nonnegative integers
+
+  integer*? vi(mi,max)   ! integer type matching sint  from "types.h"
+  integer*? vl(ml,max)   ! integer type matching slong from "types.h"
+  real      vr(mr,max)
+  call crystal_tuple_transfer(h,n,max, vi,mi,vl,ml,vr,mr, kp)
+    - moves each column vi(:,i),vl(:,i),vr(:,i) 1 <= i <= n,
+      to proc vi(kp,i)
+    - sets n to the number of columns received,
+      which may be larger than max (indicating loss of n-max columns)
+    - also sets vi(kp,i) to the source proc of columns vi(:,i),vl(:,i),vr(:,i)
+
+  call crystal_tuple_sort(h,n, vi,mi,vl,ml,vr,mr, key,nkey)
+    - locally sorts columns vi/vl/vr (:,1...n) in ascending order,
+      ranked by vi(key(1),i) [ or vl(key(1)-mi,i) if key(1)>mi ],
+           then vi(key(2),i) [ or vl(key(2)-mi,i) if key(2)>mi ],
+           ...
+           then vi(key(nkey),i) or vl(key(nkey)-mi,i)
+    - no communication; h used for scratch area
+    - linear time
+    - assumes nonnegative integers
+    - sorting on reals not yet implemented
+
+  --------------------------------------------------------------------------*/
+
+#undef   crystal_free
+#define ccrystal_free  PREFIXED_NAME(crystal_free)
+
+#define fcrystal_setup           \
+  FORTRAN_NAME(crystal_setup          ,CRYSTAL_SETUP          )
+#define fcrystal_ituple_sort     \
+  FORTRAN_NAME(crystal_ituple_sort    ,CRYSTAL_ITUPLE_SORT    )
+#define fcrystal_tuple_sort      \
+  FORTRAN_NAME(crystal_tuple_sort     ,CRYSTAL_TUPLE_SORT     )
+#define fcrystal_ituple_transfer \
+  FORTRAN_NAME(crystal_ituple_transfer,CRYSTAL_ITUPLE_TRANSFER)
+#define fcrystal_tuple_transfer  \
+  FORTRAN_NAME(crystal_tuple_transfer ,CRYSTAL_TUPLE_TRANSFER )
+#define fcrystal_free            \
+  FORTRAN_NAME(crystal_free           ,CRYSTAL_FREE           )
+
+static struct crystal **handle_array = 0;
+static int handle_max = 0;
+static int handle_n = 0;
+
+void fcrystal_setup(sint *handle, const MPI_Fint *comm, const sint *np)
+{
+  struct crystal *p;
+  if(handle_n==handle_max)
+    handle_max+=handle_max/2+1,
+    handle_array=trealloc(struct crystal*,handle_array,handle_max);
+  handle_array[handle_n]=p=tmalloc(struct crystal,1);
+  comm_init_check(&p->comm, *comm, *np);
+  buffer_init(&p->data,1000);
+  buffer_init(&p->work,1000);
+  *handle = handle_n++;
+}
+
+#define CHECK_HANDLE(func) do \
+  if(*handle<0 || *handle>=handle_n || !handle_array[*handle]) \
+    fail(1,__FILE__,__LINE__,func ": invalid handle"); \
+while(0)
+
+void fcrystal_ituple_sort(const sint *handle,
+                          sint A[], const sint *m, const sint *n,
+                          const sint keys[], const sint *nkey)
+{
+  const size_t size = (*m)*sizeof(sint);
+  sint nk = *nkey;
+  buffer *buf;
+  CHECK_HANDLE("crystal_ituple_sort");
+  buf = &handle_array[*handle]->data;
+  if(--nk>=0) {
+    sortp(buf,0, (uint*)&A[keys[nk]-1],*n,size);
+    while(--nk>=0)
+      sortp(buf,1, (uint*)&A[keys[nk]-1],*n,size);
+    sarray_permute_buf_(ALIGNOF(sint),size,A,*n, buf);
+  }
+}
+
+void fcrystal_tuple_sort(const sint *const handle, const sint *const n,
+                         sint   Ai[], const sint *const mi,
+                         slong  Al[], const sint *const ml,
+                         double Ad[], const sint *const md,
+                         const sint keys[], const sint *const nkey)
+{
+  const size_t size_i = (*mi)*sizeof(sint),
+               size_l = (*ml)*sizeof(slong),
+               size_d = (*md)*sizeof(double);
+  int init=0;
+  sint nk = *nkey;
+  buffer *buf;
+  CHECK_HANDLE("crystal_tuple_sort");
+  buf = &handle_array[*handle]->data;
+  if(nk<=0) return;
+  while(--nk>=0) {
+    sint k = keys[nk]-1;
+    if(k<0 || k>=*mi+*ml)
+      fail(1,__FILE__,__LINE__,"crystal_tuple_sort: invalid key");
+    else if(k<*mi) sortp     (buf,init, (uint *)&Ai[k],    *n,size_i);
+    else           sortp_long(buf,init, (ulong*)&Al[k-*mi],*n,size_l);
+    init=1;
+  }
+  if(*mi) sarray_permute_buf_(ALIGNOF(sint  ),size_i,Ai,*n, buf);
+  if(*ml) sarray_permute_buf_(ALIGNOF(slong ),size_l,Al,*n, buf);
+  if(*md) sarray_permute_buf_(ALIGNOF(double),size_d,Ad,*n, buf);
+}
+
+void fcrystal_ituple_transfer(const sint *handle,
+                              sint A[], const sint *m, sint *n,
+                              const sint *nmax, const sint *proc_key)
+{
+  struct array ar, *const ar_ptr = &ar;
+  const unsigned size=(*m)*sizeof(sint);
+  CHECK_HANDLE("crystal_ituple_transfer");
+  ar.ptr=A, ar.n=*n, ar.max=*nmax;
+  *n = sarray_transfer_many(&ar_ptr,&size,1, 1,0,1,(*proc_key-1)*sizeof(sint),
+         (uint*)&A[*proc_key-1],size, handle_array[*handle]);
+}
+
+void fcrystal_tuple_transfer(
+  const sint *const handle, sint *const n, const sint *const max,
+  sint   Ai[], const sint *const mi,
+  slong  Al[], const sint *const ml,
+  double Ad[], const sint *const md,
+  const sint *const proc_key)
+{
+  struct array ar_i, ar_l, ar_d, *ar[3];
+  unsigned size[3];
+  CHECK_HANDLE("crystal_tuple_transfer");
+  size[0]=*mi*sizeof(sint);
+  size[1]=*ml*sizeof(slong);
+  size[2]=*md*sizeof(double);
+  ar[0]=&ar_i, ar[1]=&ar_l, ar[2]=&ar_d;
+  ar_i.ptr=Ai,ar_l.ptr=Al,ar_d.ptr=Ad;
+  ar_i.n=ar_l.n=ar_d.n = *n;
+  ar_i.max=ar_l.max=ar_d.max=*max;
+  *n = sarray_transfer_many(ar,size,3, 1,0,1,(*proc_key-1)*sizeof(sint),
+         (uint*)&Ai[*proc_key-1],size[0], handle_array[*handle]);
+}
+
+void fcrystal_free(sint *handle)
+{
+  CHECK_HANDLE("crystal_free");
+  ccrystal_free(handle_array[*handle]);
+  free(handle_array[*handle]);
+  handle_array[*handle] = 0;
+}
+
+
diff --git a/src/jl/gen_poly_imp.c b/src/jl/gen_poly_imp.c
new file mode 100644
index 0000000..21a7410
--- /dev/null
+++ b/src/jl/gen_poly_imp.c
@@ -0,0 +1,227 @@
+#include <math.h>
+#include <stdio.h>
+#include <gmp.h>
+
+#define PREC_BITS 256
+#define DIGITS 50
+
+#define GLL_LAG_FIX_MAX 16
+
+#if 1
+#  define STATIC "static "
+#else
+#  define STATIC ""
+#endif
+
+
+#define PI 3.1415926535897932384626433832795028841971693993751058209749445923
+
+#define DECLARE_1VAR(a)        static int init=0; static mpf_t a; \
+                               if(!init) init=1, mpf_init(a)
+#define DECLARE_2VARS(a,b)     static int init=0; static mpf_t a,b; \
+                               if(!init) init=1, mpf_init(a), mpf_init(b)
+#define DECLARE_3VARS(a,b,c)   static int init=0; static mpf_t a,b,c; \
+                               if(!init) init=1, mpf_init(a), mpf_init(b), \
+                                                 mpf_init(c)
+#define DECLARE_4VARS(a,b,c,d) static int init=0; static mpf_t a,b,c,d; \
+                               if(!init) init=1, mpf_init(a), mpf_init(b), \
+                                                 mpf_init(c), mpf_init(d)
+                                                 
+static int is_small(const mpf_t x, const mpf_t y) {
+  DECLARE_2VARS(xa,ya);
+  mpf_abs(xa,x);
+  mpf_abs(ya,y);
+  mpf_div_2exp(ya,ya,PREC_BITS-mp_bits_per_limb);
+  return mpf_cmp(xa,ya) < 0;
+}
+
+typedef void fun_3term(mpf_t Pn, int n, const mpf_t x);
+
+#define DECLARE_THREE_TERM(name, i0_init, init_Ps, a_ip1,a_i,a_im1) \
+static void name(mpf_t Pn, int n, const mpf_t x) \
+{ \
+  int i, i0_init; \
+  DECLARE_4VARS(a,b,P_im1,P_i); \
+  init_Ps; \
+  for(i=i0+1; i<n; ++i) { \
+    mpf_mul(a, x,P_i); \
+    mpf_mul_ui(a, a,a_i); \
+    mpf_mul_ui(b, P_im1,a_im1); \
+    mpf_sub(a, a,b); \
+    mpf_swap(P_im1, P_i); \
+    mpf_div_ui(P_i, a,a_ip1); \
+  } \
+  mpf_set(Pn, n>i0?P_i:P_im1); \
+}
+
+DECLARE_THREE_TERM(legendre,    i0=0,(mpf_set_ui(P_im1,1),mpf_set   (P_i,x)),
+                   i+1, 2*i+1, i  )
+DECLARE_THREE_TERM(legendre_d1, i0=0,(mpf_set_ui(P_im1,0),mpf_set_ui(P_i,1)),
+                   i  , 2*i+1, i+1)
+DECLARE_THREE_TERM(legendre_d2, i0=1,(mpf_set_ui(P_im1,0),mpf_set_ui(P_i,3)),
+                   i-1, 2*i+1, i+2)
+
+static void newton(mpf_t x, double seed,
+                   fun_3term *fun, fun_3term *der, int n)
+{
+  DECLARE_3VARS(ox,f,df);
+  mpf_set_d(x, seed);
+  do {
+    mpf_set(ox, x);
+    fun(f, n,x), der(df, n,x), mpf_div(f, f,df), mpf_sub(x, x,f);
+  } while(!is_small(f,x));
+  fun( f, n,x), der(df, n,x), mpf_div(f, f,df), mpf_sub(x, x,f);
+}
+
+static void gauss_node(mpf_t z, int n, int i) {
+  if( (n&1) && i==n/2 ) mpf_set_ui(z,0);
+  else newton(z, cos( (2*n-2*i-1)*(PI/2)/n ), legendre,legendre_d1,n);
+}
+
+static void lobatto_node(mpf_t z, int n, int i) {
+  if( (n&1) && i==n/2 ) mpf_set_ui(z,0);
+  else if(i==0)   mpf_set_d(z,-(double)1);
+  else if(i==n-1) mpf_set_ui(z,1);
+  else newton(z, cos( (n-1-i)*PI/(n-1) ), legendre_d1,legendre_d2,n-1);
+}
+
+#define PRINT_LIST(i, i0,nline,n, printi,sep,sepline) \
+  do { \
+    int i; \
+    for(i=i0;i<n;++i) { \
+      printi; \
+      printf("%s",i==n-1?"":((i-i0)%nline==nline-1?sepline:sep)); \
+    } \
+  } while(0)
+
+static void print_gll_lag_fix(int n)
+{
+  int i;
+  DECLARE_1VAR(z);
+  if(n>3) {
+    printf("static const double gllz_%02d[%2d] = {\n  ",n,n/2-1);
+    for(i=1;i<=n/2-1;++i) {
+      lobatto_node(z, n,n-1-i);
+      if(i!=1) printf(",\n  ");
+      gmp_printf("%.*Fg",DIGITS,z);
+    }
+    puts("\n};\n");
+  }
+  printf(STATIC "void gll_lag_%02d(double *restrict p, double *restrict data,\n"
+           "                       unsigned n, int d, double xh)\n{\n",n);
+  printf("  const double *restrict w = data;\n");
+  printf("  const double x = xh*2;\n");
+  #define PRINT_D(i) do { \
+    printf("d%02d=x",i); \
+    if(2*i+1==n)    printf("              "); \
+    else if(i==0)   printf("+2            "); \
+    else if(i==n-1) printf("-2            "); \
+    else if(i<n/2)  printf("+2*gllz_%02d[%2d]",n,i-1); \
+    else            printf("-2*gllz_%02d[%2d]",n,n-2-i); \
+  } while(0)
+  printf("%s",                            "  const double ");
+  PRINT_LIST(i, 0,3,n, PRINT_D(i),",",",\n               ");
+  #undef PRINT_D
+  #define PRINT_U0(i) (i==0  ?printf("    1"):printf("u0_%02d",i))
+  #define PRINT_V0(i) (i==n-1?printf("    1"):printf("v0_%02d",i))
+  #define PRINT_U1(i) (i<=1  ?printf("    %d",i      ):printf("u1_%02d",i))
+  #define PRINT_V1(i) (i>=n-2?printf("    %d",n-1-(i)):printf("v1_%02d",i))
+  #define PRINT_U2(i) (i<=1  ?printf("    0"): \
+                      (i==2  ?printf("    2"):printf("u2_%02d",i)))
+  #define PRINT_V2(i) (i>=n-2?printf("    0"): \
+                      (i==n-3?printf("    2"):printf("v2_%02d",i)))
+  printf("%s",";\n  const double ");
+  PRINT_LIST(i, 1,3,n,
+    (PRINT_U0(i),putchar('='),PRINT_U0(i-1),printf("*d%02d",i-1)),
+    ",",",\n               ");
+  printf("%s",";\n  const double ");
+  PRINT_LIST(i, 1,3,n,
+    (PRINT_V0(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V0(n-i)),
+    ",",",\n               ");
+  printf("%s",";\n  ");
+  PRINT_LIST(i, 0,3,n, 
+    (printf("p[%2d]=w[%2d]*",i,i),PRINT_U0(i),putchar('*'),
+     PRINT_V0(i)),"; ",";\n  ");
+  puts(";\n  if(d>0) {");
+  if(n>2) {
+    printf("%s","    const double ");
+    PRINT_LIST(i, 2,2,n,
+      (PRINT_U1(i),putchar('='),PRINT_U1(i-1),printf("*d%02d",i-1),
+       putchar('+'),PRINT_U0(i-1)),
+      ",",",\n                 ");
+    printf("%s",";\n    const double ");
+    PRINT_LIST(i, 2,2,n,
+      (PRINT_V1(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V1(n-i),
+       putchar('+'),PRINT_V0(n-i)),
+      ",",",\n                 ");
+    puts(";");
+  }
+  for(i=0;i<n;++i) {
+    printf("    p[%d+%2d]=2*w[%2d]*(",n,i,i);
+    if(i==0)        printf("                  "),PRINT_V1(0);
+    else if(i==n-1) PRINT_U1(i),printf("                  ");
+    else PRINT_U1(i),putchar('*'),PRINT_V0(i),putchar('+'),
+         PRINT_U0(i),putchar('*'),PRINT_V1(i);
+    puts(");");
+  }
+  puts("    if(d>1) {");
+  if(n>3) {
+    printf("%s","      const double ");
+    PRINT_LIST(i, 3,2,n,
+      (PRINT_U2(i),putchar('='),PRINT_U2(i-1),printf("*d%02d",i-1),
+       printf("+2*"),PRINT_U1(i-1)),
+      ",",",\n                   ");
+    printf("%s",";\n      const double ");
+    PRINT_LIST(i, 3,2,n,
+      (PRINT_V2(n-1-i),putchar('='),printf("d%02d*",n-i),PRINT_V2(n-i),
+       printf("+2*"),PRINT_V1(n-i)),
+      ",",",\n                   ");
+    puts(";");
+  }  
+  if(n<3) for(i=0;i<n;++i) printf("      p[2*%d+%2d]=0;\n",n,i);
+  else for(i=0;i<n;++i) {
+      printf("      p[2*%d+%2d]=4*w[%2d]*(",n,i,i);
+      if(i>1)
+        PRINT_U2(i),putchar('*'),PRINT_V0(i);
+      else printf("           ");
+      if(i>0 && i<n-1)
+        printf("+2*"),PRINT_U1(i),putchar('*'),PRINT_V1(i);
+      else printf("              ");
+      if(i<n-2)
+        putchar('+'),PRINT_U0(i),putchar('*'),PRINT_V2(i);
+      else printf("            ");
+      puts(");");
+  }
+  #undef PRINT_U0
+  #undef PRINT_V0
+  #undef PRINT_U1
+  #undef PRINT_V1
+  #undef PRINT_U2
+  #undef PRINT_V2
+  puts("    }\n  }\n}");
+}
+
+
+int main()
+{
+  int n;
+  mpf_set_default_prec(PREC_BITS);
+  puts("/* generated by gen_poly_imp.c */\n");
+  printf("#define GLL_LAG_FIX_MAX %d\n\n",GLL_LAG_FIX_MAX);
+  /*puts("typedef void gll_lag_fun(double *p, int d, int n, double x);\n");*/
+  for(n=2;n<=GLL_LAG_FIX_MAX;++n)
+      print_gll_lag_fix(n), puts("");
+  printf(STATIC "const double *const gllz_table[%d] = {\n  ",
+    GLL_LAG_FIX_MAX-3);
+  PRINT_LIST(i, 4,8,(GLL_LAG_FIX_MAX+1),
+    printf("gllz_%02d",i), ", ",",\n  ");
+  puts("\n};");
+  puts("");
+  printf(STATIC "lagrange_fun *const gll_lag_table[%d] = {\n  ",
+    GLL_LAG_FIX_MAX-1);
+  PRINT_LIST(i, 2,6,(GLL_LAG_FIX_MAX+1),
+    printf("&gll_lag_%02d",i), ", ",",\n  ");
+  puts("\n};");
+  puts("");
+  return 0;
+}
diff --git a/src/jl/gs.c b/src/jl/gs.c
new file mode 100644
index 0000000..d39051a
--- /dev/null
+++ b/src/jl/gs.c
@@ -0,0 +1,1503 @@
+#include <stdio.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+
+#ifdef _OPENMP
+#include "omp.h"
+#endif
+
+#define gs_op gs_op_t   /* fix conflict with fortran */
+
+#include "gs_defs.h"
+#include "gs_local.h"
+#include "comm.h"
+#include "mem.h"
+#include "sort.h"
+#include "crystal.h"
+#include "sarray_sort.h"
+#include "sarray_transfer.h"
+
+#define gs         PREFIXED_NAME(gs       )
+#define gs_vec     PREFIXED_NAME(gs_vec   )
+#define gs_many    PREFIXED_NAME(gs_many  )
+#define gs_setup   PREFIXED_NAME(gs_setup )
+#define gs_free    PREFIXED_NAME(gs_free  )
+#define gs_unique  PREFIXED_NAME(gs_unique)
+
+GS_DEFINE_DOM_SIZES()
+
+typedef enum { mode_plain, mode_vec, mode_many,
+               mode_dry_run } gs_mode;
+
+static buffer static_buffer = null_buffer;
+
+static void gather_noop(
+  void *out, const void *in, const unsigned vn,
+  const uint *map, gs_dom dom, gs_op op)
+{}
+
+static void scatter_noop(
+  void *out, const void *in, const unsigned vn,
+  const uint *map, gs_dom dom)
+{}
+
+static void init_noop(
+  void *out, const unsigned vn,
+  const uint *map, gs_dom dom, gs_op op)
+{}
+
+/*------------------------------------------------------------------------------
+  Topology Discovery
+------------------------------------------------------------------------------*/
+
+struct gs_topology {
+  ulong total_shared; /* number of globally unique shared ids */
+  struct array nz; /* array of nonzero_id's, grouped by id, 
+                      sorted by primary index, then flag, then index */
+  struct array sh; /* array of shared_id's, arbitrary ordering */
+  struct array pr; /* array of primary_shared_id's */
+};
+
+static void gs_topology_free(struct gs_topology *top)
+{
+  array_free(&top->pr);
+  array_free(&top->sh);
+  array_free(&top->nz);
+}
+
+/************** Local topology **************/
+
+/* nonzero_ids    (local part)
+
+   Creates an array of s_nonzeros, one per nonzero in user id array. The
+   output array is grouped by id. Within each group, non-flagged entries come
+   first; otherwise the entries within the group are sorted by the index into
+   the user id array. The first index in each group is the primary index, and
+   is stored along with each entry. The groups themselves are ordered in
+   increasing order of the primary index associated with the group (as opposed
+   to the user id). */
+
+struct nonzero_id {
+  ulong id; uint i, flag, primary;
+};
+
+static void nonzero_ids(struct array *nz,
+                        const slong *id, const uint n, buffer *buf)
+{
+  ulong last_id = -(ulong)1;
+  uint i, primary = -(uint)1;
+  struct nonzero_id *row, *end;
+  array_init(struct nonzero_id,nz,n), end=row=nz->ptr;
+  for(i=0;i<n;++i) {
+    slong id_i = id[i], abs_id = iabsl(id_i);
+    if(id_i==0) continue;
+    end->i = i;
+    end->id = abs_id;
+    end->flag = id_i!=abs_id;
+    ++end;
+  }
+  nz->n = end-row;
+  array_resize(struct nonzero_id,nz,nz->n);
+  sarray_sort_2(struct nonzero_id,nz->ptr,nz->n, id,1, flag,0, buf);
+  for(row=nz->ptr,end=row+nz->n;row!=end;++row) {
+    ulong this_id = row->id;
+    if(this_id!=last_id) primary = row->i;
+    row->primary = primary;
+    last_id = this_id;
+  }
+  sarray_sort(struct nonzero_id,nz->ptr,nz->n, primary,0, buf);
+}
+
+/************** Global topology **************/
+
+/* construct list of all unique id's on this proc */
+struct unique_id { ulong id; uint work_proc, src_if; };
+static void unique_ids(struct array *un, const struct array *nz, const uint np)
+{
+  struct unique_id *un_row;
+  const struct nonzero_id *nz_row, *nz_end;
+  array_init(struct unique_id,un,nz->n), un_row=un->ptr;
+  for(nz_row=nz->ptr,nz_end=nz_row+nz->n;nz_row!=nz_end;++nz_row) {
+    if(nz_row->i != nz_row->primary) continue;
+    un_row->id = nz_row->id;
+    un_row->work_proc = nz_row->id%np;
+    un_row->src_if = nz_row->flag ? ~nz_row->i : nz_row->i;
+    ++un_row;
+  }
+  un->n = un_row - (struct unique_id*)un->ptr;
+}
+
+/* shared_ids    (global part)
+
+   Creates an array of shared_id's from an array of nonzero_id's. Each entry
+   in the output identifies one id shared with one other processor p.
+   Note: two procs share an id only when at least one of them has it unflagged.
+   The primary index is i locally and ri remotely. Bit 1 of flags indicates
+   the local flag, bit 2 indicates the remote flag. The output has no
+   particular ordering.
+
+   Also creates an array of primary_shared_id's, one for each shared id.
+   This struct includes ord, a global rank of the id (arbitrary, but unique). */
+
+#define FLAGS_LOCAL  1
+#define FLAGS_REMOTE 2
+
+/* i  : local primary index
+   p  : remote proc
+   ri : remote primary index
+   bi : buffer index (set and used during pw setup) */
+struct shared_id {
+  ulong id; uint i, p, ri, bi; unsigned flags;
+};
+
+struct primary_shared_id {
+  ulong id, ord; uint i; unsigned flag;
+};
+
+
+
+struct shared_id_work { ulong id,ord; uint p1, p2, i1f, i2f; };
+static void shared_ids_aux(struct array *sh, struct array *pr, uint pr_n,
+                           struct array *wa, buffer *buf)
+{
+  const struct shared_id_work *w, *we;
+  struct shared_id *s;
+  struct primary_shared_id *p;
+  ulong last_id = -(ulong)1;
+  /* translate work array to output arrays */
+  sarray_sort(struct shared_id_work,wa->ptr,wa->n, id,1, buf);
+  array_init(struct shared_id,sh,wa->n), sh->n=wa->n, s=sh->ptr;
+  array_init(struct primary_shared_id,pr,pr_n), p=pr->ptr;
+  for(w=wa->ptr,we=w+wa->n;w!=we;++w) {
+    uint i1f = w->i1f, i2f = w->i2f;
+    uint i1 = ~i1f<i1f?~i1f:i1f, i2 = ~i2f<i2f?~i2f:i2f;
+    s->id=w->id, s->i=i1, s->p=w->p2, s->ri=i2;
+    s->flags = ((i2f^i2)&FLAGS_REMOTE) | ((i1f^i1)&FLAGS_LOCAL);
+    ++s;
+    if(w->id!=last_id) {
+      last_id=w->id;
+      p->id=last_id, p->ord=w->ord, p->i=i1, p->flag=(i1f^i1)&FLAGS_LOCAL;
+      ++p;
+    }
+  }
+  pr->n = p-(struct primary_shared_id*)pr->ptr;
+  sarray_sort(struct primary_shared_id,pr->ptr,pr->n, i,0, buf);
+}
+
+static ulong shared_ids(struct array *sh, struct array *pr,
+                        const struct array *nz, struct crystal *cr)
+{
+  struct array un; struct unique_id *un_row, *un_end, *other;
+  ulong last_id = -(ulong)1;
+  ulong ordinal[2], n_shared=0, scan_buf[2];
+  struct array wa; struct shared_id_work *w;
+  uint n_unique;
+  /* construct list of all unique id's on this proc */
+  unique_ids(&un,nz,cr->comm.np);
+  n_unique = un.n;
+  /* transfer list to work procs */
+  sarray_transfer(struct unique_id,&un, work_proc,1, cr);
+  /* group by id, put flagged entries after unflagged (within each group) */
+  sarray_sort_2(struct unique_id,un.ptr,un.n, id,1, src_if,0, &cr->data);
+  /* count shared id's */
+  for(un_row=un.ptr,un_end=un_row+un.n;un_row!=un_end;++un_row) {
+    ulong id = un_row->id;
+    if(~un_row->src_if<un_row->src_if) continue;
+    if(id==last_id) continue;
+    other=un_row+1;
+    if(other!=un_end&&other->id==id) last_id=id, ++n_shared;
+  }
+  comm_scan(ordinal, &cr->comm,gs_slong,gs_add, &n_shared,1, scan_buf);
+  /* there are ordinal[1] globally shared unique ids;
+           and ordinal[0] of those are seen by work procs of lower rank;
+     i.e., this work processor sees the range ordinal[0] + (0,n_shared-1) */
+  /* construct list of shared ids */
+  last_id = -(ulong)1;
+  array_init(struct shared_id_work,&wa,un.n), wa.n=0, w=wa.ptr;
+  for(un_row=un.ptr,un_end=un_row+un.n;un_row!=un_end;++un_row) {
+    ulong id = un_row->id;
+    uint p1 = un_row->work_proc, i1f = un_row->src_if;
+    if(~i1f<i1f) continue;
+    for(other=un_row+1;other!=un_end&&other->id==id;++other) {
+      uint p2 = other->work_proc, i2f = other->src_if;
+      ulong ord;
+      if(id!=last_id) last_id=id, ++ordinal[0];
+      ord=ordinal[0]-1;
+      if(wa.n+2>wa.max)
+        array_reserve(struct shared_id_work,&wa,wa.n+2),
+        w=(struct shared_id_work*)wa.ptr+wa.n;
+      w->id=id, w->ord=ord, w->p1=p1, w->p2=p2, w->i1f=i1f, w->i2f=i2f, ++w;
+      w->id=id, w->ord=ord, w->p1=p2, w->p2=p1, w->i1f=i2f, w->i2f=i1f, ++w;
+      wa.n+=2;
+    }
+  }
+  /* transfer shared list to source procs */
+  sarray_transfer(struct shared_id_work,&wa, p1,0, cr);
+  /* fill output arrays from work array */
+  shared_ids_aux(sh,pr,n_unique,&wa,&cr->data);
+  array_free(&un);
+  array_free(&wa);
+  return ordinal[1];
+}
+
+static void get_topology(struct gs_topology *top,
+                         const slong *id, uint n, struct crystal *cr)
+{
+  nonzero_ids(&top->nz,id,n,&cr->data);
+  top->total_shared = shared_ids(&top->sh,&top->pr, &top->nz,cr);
+}
+
+static void make_topology_unique(struct gs_topology *top, slong *id,
+                                 uint pid, buffer *buf)
+{
+  struct array *const nz=&top->nz, *const sh=&top->sh, *const pr=&top->pr;
+  struct nonzero_id *pnz;
+  struct shared_id *pb, *pe, *e, *out;
+  struct primary_shared_id *q;
+
+  /* flag local non-primaries */
+  sarray_sort(struct nonzero_id,nz->ptr,nz->n, i,0, buf);
+  if(id) {
+    struct nonzero_id *p,*e;
+    for(p=nz->ptr,e=p+nz->n;p!=e;++p)
+      if(p->i != p->primary) id[p->i]=-(slong)p->id,p->flag=1;
+  } else {
+    struct nonzero_id *p,*e;
+    for(p=nz->ptr,e=p+nz->n;p!=e;++p)
+      if(p->i != p->primary) p->flag=1;
+  }
+  sarray_sort(struct nonzero_id,nz->ptr,nz->n, primary,0, buf);
+
+  /* assign owner among shared primaries */
+  
+  /* create sentinel with i = -1 */
+  array_reserve(struct shared_id,sh,sh->n+1);
+  ((struct shared_id*)sh->ptr)[sh->n].i = -(uint)1;
+  /* in the sorted list of procs sharing a given id,
+     the owner is chosen to be the j^th unflagged proc,
+     where j = id mod (length of list) */
+  sarray_sort_2(struct shared_id,sh->ptr,sh->n, i,0, p,0, buf);
+  out=sh->ptr; pnz=top->nz.ptr;
+  for(pb=sh->ptr,e=pb+sh->n;pb!=e;pb=pe) {
+    uint i = pb->i, lt=0,gt=0, owner; struct shared_id *p;
+    while(pnz->i!=i) ++pnz;
+    /* note: current proc not in list */
+    for(pe=pb; pe->i==i && pe->p<pid; ++pe) if(!(pe->flags&FLAGS_REMOTE)) ++lt;
+    for(     ; pe->i==i             ; ++pe) if(!(pe->flags&FLAGS_REMOTE)) ++gt;
+    if(!(pb->flags&FLAGS_LOCAL)) {
+      owner = pb->id%(lt+gt+1);
+      if(owner==lt) goto make_sh_unique_mine;
+      if(owner>lt) --owner;
+    } else
+      owner = pb->id%(lt+gt);
+    /* we don't own pb->id */
+    if(id) id[i] = -(slong)pb->id;
+    pnz->flag=1;
+    /* we only share this id with the owner now; remove the other entries */
+    for(p=pb; p!=pe; ++p) if(!(p->flags&FLAGS_REMOTE) && !(owner--)) break;
+    if(p!=pe) *out=*p, out->flags=FLAGS_LOCAL, ++out;
+    continue;
+  make_sh_unique_mine:
+    /* we own pb->id */
+    if(out==pb) { out=pe; for(p=pb; p!=pe; ++p) p->flags=FLAGS_REMOTE; }
+    else        for(p=pb; p!=pe; ++p) *out=*p,out->flags=FLAGS_REMOTE,++out;
+  }
+  sh->n = out - ((struct shared_id*)sh->ptr);
+
+  /* set primary_shared_id flags to match */
+  ((struct shared_id*)sh->ptr)[sh->n].i = -(uint)1;
+  sarray_sort(struct shared_id,sh->ptr,sh->n, id,1, buf);
+  sarray_sort(struct primary_shared_id,pr->ptr,pr->n, id,1, buf);
+  q=pr->ptr;
+  for(pb=sh->ptr,e=pb+sh->n;pb!=e;pb=pe) {
+    uint i=pb->i;
+    pe=pb; while(pe->i==i) ++pe;
+    if(q->id!=pb->id) printf("FAIL!!!\n");
+    q->flag=pb->flags&FLAGS_LOCAL;
+    ++q;
+  }
+}
+
+
+/*------------------------------------------------------------------------------
+  Divide lists for parallel execution
+------------------------------------------------------------------------------*/
+
+void sublist(const uint *map, uint ***slPtr) {
+
+  // Iterate over array and count items and lists
+
+  uint i,j;
+  int itemCount = 0, listCount = 0;
+  const uint *lmap = map;
+  while((i=*lmap++)!=-(unsigned int)1) {
+    listCount++;
+  
+    j=*lmap++;
+    do {
+      itemCount++;
+    } while ((j=*lmap++)!=-(unsigned int)1);
+  }
+
+  // Determine number of threads and lists
+
+  int maxThreads = 1;
+#ifdef _OPENMP
+  maxThreads = omp_get_max_threads();
+#endif
+  int max = (maxThreads <= listCount) ? maxThreads : listCount;
+  if (max == 0) max = 1;
+
+  // Setup sublists
+
+  uint *subListData = tmalloc(uint, max+itemCount+2*listCount);
+  *slPtr = tmalloc(uint*, maxThreads);
+  uint **subListPtr = *slPtr;
+
+  subListData[0] = -(unsigned int)1;
+  subListPtr[0] = subListData;
+  int nextSubList = 1;
+
+  // Populate sublists
+
+  int copyItemCount = 0;
+  lmap = map;
+  while((i=*lmap++)!=-(unsigned int)1) {
+    *subListData++ = i;
+
+    j=*lmap++;
+    do {
+      *subListData++ = j;
+      copyItemCount++;
+    } while ((j=*lmap++)!=-(unsigned int)1);
+    *subListData++ = -(unsigned int) 1;
+  
+    if ( copyItemCount*max >= itemCount*nextSubList ) {
+      *subListData= -(unsigned int)1;
+  
+      if (copyItemCount != itemCount) {
+        subListData++;
+        subListPtr[nextSubList] = subListData;
+        nextSubList++;
+      }
+    }
+  }
+
+  // Terminate unused sublists
+
+  for (; nextSubList < maxThreads; nextSubList++) {
+    subListPtr[nextSubList] = subListData;
+  }
+
+  return;
+}
+
+void subflagged(const uint *map, uint ***slPtr) {
+
+  // Iterate over map and count items
+
+  int count = 0;
+  const uint *lmap = map;
+  while(*lmap++ !=-(unsigned int)1) count++;
+
+  // Determine number of threads and sublists
+
+  int maxThreads = 1;
+#ifdef _OPENMP
+  maxThreads = omp_get_max_threads();
+#endif
+  int maxLists = (maxThreads <= count) ? maxThreads : count;
+  if (maxLists == 0) maxLists = 1;
+
+  // Setup empty sublists
+
+  uint *subFlaggedData = tmalloc(uint, maxLists+count);
+  *slPtr = tmalloc(uint*, maxThreads);
+
+  subFlaggedData[0] = -(unsigned int)1;
+  (*slPtr)[0] = subFlaggedData;
+  int nextList = 1;
+
+  // Populate sublists
+
+  int copyCount=0;
+  uint i;
+  lmap = map;
+  while((i=*lmap++)!=-(unsigned int)1) {
+    *subFlaggedData++ = i;
+    copyCount++;
+  
+    if (copyCount*maxLists >= count*nextList) {
+      *subFlaggedData = -(unsigned int)1;
+      
+      if (copyCount != count) {
+        subFlaggedData++;
+        (*slPtr)[nextList] = subFlaggedData;
+        nextList++;
+      }
+    }
+  }
+
+  // Terminate unused sublists
+  
+  for (; nextList < maxThreads; nextList++) {
+    (*slPtr)[nextList] = subFlaggedData;
+  }
+
+  return;
+}
+
+/*------------------------------------------------------------------------------
+  Local setup
+------------------------------------------------------------------------------*/
+
+/* assumes nz is sorted by primary, then flag, then index */
+static const uint *local_map(const struct array *nz, const int ignore_flagged)
+{
+  uint *map, *p, count = 1;
+  const struct nonzero_id *row, *other, *end;
+#define DO_COUNT(cond) do \
+    for(row=nz->ptr,end=row+nz->n;row!=end;) {                     \
+      ulong row_id = row->id; int any=0;                           \
+      for(other=row+1;other!=end&&other->id==row_id&&cond;++other) \
+        any=2, ++count;                                            \
+      count+=any, row=other;                                       \
+    } while(0)
+  if(ignore_flagged) DO_COUNT(other->flag==0); else DO_COUNT(1);
+#undef DO_COUNT
+  p = map = tmalloc(uint,count);
+#define DO_SET(cond) do \
+    for(row=nz->ptr,end=row+nz->n;row!=end;) {                     \
+      ulong row_id = row->id; int any=0;                           \
+      *p++ = row->i;                                               \
+      for(other=row+1;other!=end&&other->id==row_id&&cond;++other) \
+        any=1, *p++ = other->i;                                    \
+      if(any) *p++ = -(uint)1; else --p;                           \
+      row=other;                                                   \
+    } while(0)
+  if(ignore_flagged) DO_SET(other->flag==0); else DO_SET(1);
+#undef DO_SET
+  *p = -(uint)1;
+  return map;
+}
+
+static const uint *flagged_primaries_map(const struct array *nz)
+{
+  uint *map, *p, count=1;
+  const struct nonzero_id *row, *end;
+  for(row=nz->ptr,end=row+nz->n;row!=end;++row)
+    if(row->i==row->primary && row->flag==1) ++count;
+  p = map = tmalloc(uint,count);
+  for(row=nz->ptr,end=row+nz->n;row!=end;++row)
+    if(row->i==row->primary && row->flag==1) *p++ = row->i;
+  *p = -(uint)1;
+  return map;
+}
+
+/*------------------------------------------------------------------------------
+  Remote execution and setup
+------------------------------------------------------------------------------*/
+
+typedef void exec_fun(
+  void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op,
+  unsigned transpose, const void *execdata, const struct comm *comm, char *buf);
+typedef void fin_fun(void *data);
+
+struct gs_remote {
+  uint buffer_size;
+  void *data;
+  exec_fun *exec;
+  fin_fun *fin;
+};
+
+typedef void setup_fun(struct gs_remote *r, struct gs_topology *top,
+                       const struct comm *comm, buffer *buf);
+
+/*------------------------------------------------------------------------------
+  Pairwise Execution
+------------------------------------------------------------------------------*/
+struct pw_comm_data {
+  uint n;      /* number of messages */
+  uint *p;     /* message source/dest proc */
+  uint *size;  /* size of message */
+  uint total;  /* sum of message sizes */
+  size_t *offsets;
+};
+
+struct pw_data {
+  struct pw_comm_data comm[2];
+  const uint *map[2];
+  comm_req *req;
+  uint buffer_size;
+  uint **submap[2];
+};
+
+static char *pw_exec_recvs(char *buf, const unsigned unit_size,
+                           const struct comm *comm,
+                           const struct pw_comm_data *c, comm_req *req)
+{
+  const uint *p=c->p, *size=c->size;
+  int i;
+  char *retVal = buf;
+
+#ifdef MPITHREADS
+#pragma omp for
+#endif
+  for (i = 0; i < c->n; i++) {
+    comm_irecv(&(req[i]),comm,buf+c->offsets[i]*unit_size,size[i]*unit_size,p[i],p[i]);
+  }
+
+  if (c->n != 0) {
+   retVal += c->offsets[c->n-1]*unit_size + size[c->n-1]*unit_size;
+  }
+
+  return retVal;
+}
+
+static char *pw_exec_sends(char *buf, const unsigned unit_size,
+                           const struct comm *comm,
+                           const struct pw_comm_data *c, comm_req *req)
+{
+  const uint *p=c->p, *size=c->size;
+  int i;
+  char *retVal = buf;
+
+#ifdef MPITHREADS
+#pragma omp for
+#endif
+  for(i = 0; i < c->n; i++) {
+    comm_isend(&(req[i]),comm,buf+c->offsets[i]*unit_size,size[i]*unit_size,p[i],comm->id);
+  }
+
+  if (c->n != 0) {
+   retVal += c->offsets[c->n-1]*unit_size + size[c->n-1]*unit_size;
+  }
+
+  return retVal;
+}
+
+static void pw_exec(
+  void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op,
+  unsigned transpose, const void *execdata, const struct comm *comm, char *buf)
+{
+  const struct pw_data *pwd = execdata;
+  static gs_scatter_fun *const scatter_to_buf[] =
+    { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop };
+  static gs_gather_fun *const gather_from_buf[] =
+    { &gs_gather, &gs_gather_vec, &gs_gather_vec_to_many, &gather_noop };
+  const unsigned recv = 0^transpose, send = 1^transpose;
+  unsigned unit_size = vn*gs_dom_size[dom];
+
+#ifdef MPITHREADS
+  char *sendbuf;
+#else
+  static char *sendbuf;
+#endif
+
+  int thd = 0;
+  int inp = 0;
+  #ifdef _OPENMP
+    thd = omp_get_thread_num();
+    inp = omp_in_parallel();
+  #endif
+
+  if (inp) {
+    /* post receives */
+#ifndef MPITHREADS
+    #pragma omp master
+#endif
+    {
+      sendbuf = pw_exec_recvs(buf,unit_size,comm,&pwd->comm[recv],pwd->req);
+    }
+    #pragma omp barrier
+
+    /* fill send buffer */
+    scatter_to_buf[mode](sendbuf,data,vn,(pwd->submap[send])[thd],dom);
+    #pragma omp barrier
+
+    /* post sends */
+#ifndef MPITHREADS
+    #pragma omp master
+#endif
+    {
+      pw_exec_sends(sendbuf,unit_size,comm,&pwd->comm[send],
+                      &pwd->req[pwd->comm[recv].n]);
+    }
+    #pragma omp barrier
+
+    #pragma omp master 
+    {
+      comm_wait(pwd->req,pwd->comm[0].n+pwd->comm[1].n);
+    }
+    #pragma omp barrier
+
+    /* gather using recv buffer */
+    gather_from_buf[mode](data,buf,vn,(pwd->submap[recv])[thd],dom,op);
+  } else {
+    /* post receives */
+    sendbuf = pw_exec_recvs(buf,unit_size,comm,&pwd->comm[recv],pwd->req);
+    /* fill send buffer */
+    scatter_to_buf[mode](sendbuf,data,vn,pwd->map[send],dom);
+    /* post sends */
+    pw_exec_sends(sendbuf,unit_size,comm,&pwd->comm[send],
+                  &pwd->req[pwd->comm[recv].n]);
+    comm_wait(pwd->req,pwd->comm[0].n+pwd->comm[1].n);
+    /* gather using recv buffer */
+    gather_from_buf[mode](data,buf,vn,pwd->map[recv],dom,op);
+  }
+
+}
+
+/*------------------------------------------------------------------------------
+  Pairwise setup
+------------------------------------------------------------------------------*/
+static void pw_comm_setup(struct pw_comm_data *data, struct array *sh,
+                          const unsigned flags_mask, buffer *buf)
+{
+  uint n=0,count=0, lp=-(uint)1;
+  struct shared_id *s, *se;
+  /* sort by remote processor and id (a globally consistent ordering) */
+  sarray_sort_2(struct shared_id,sh->ptr,sh->n, p,0, id,1, buf);
+  /* assign index into buffer */
+  for(s=sh->ptr,se=s+sh->n;s!=se;++s) {
+    if(s->flags&flags_mask) { s->bi = -(uint)1; continue; }
+    s->bi = count++;
+    if(s->p!=lp) lp=s->p, ++n;
+  }
+  data->n = n;
+  data->p = tmalloc(uint,2*n);
+  data->size = data->p + n;
+  data->total = count;
+  n = 0, lp=-(uint)1;
+  for(s=sh->ptr,se=s+sh->n;s!=se;++s) {
+    if(s->flags&flags_mask) continue;
+    if(s->p!=lp) {
+      lp=s->p;
+      if(n!=0) data->size[n-1] = count;
+      count=0, data->p[n++]=lp;
+    }
+    ++count;
+  }
+  if(n!=0) data->size[n-1] = count;
+
+  data->offsets = malloc(sizeof(size_t)*data->n);
+  int i;
+  size_t len = 0;
+  for (i = 0; i < data->n; i++) {
+    data->offsets[i] = len;
+    len += data->size[i];
+  }
+}
+
+static void pw_comm_free(struct pw_comm_data *data) { free(data->p); free(data->offsets);}
+
+/* assumes that the bi field of sh is set */
+static const uint *pw_map_setup(struct array *sh, buffer *buf)
+{
+  uint count=0, *map, *p;
+  struct shared_id *s, *se;
+  sarray_sort(struct shared_id,sh->ptr,sh->n, i,0, buf);
+  /* calculate map size */
+  count=1;
+  for(s=sh->ptr,se=s+sh->n;s!=se;) {
+    uint i=s->i;
+    if(s->bi==-(uint)1) { ++s; continue; }
+    count+=3;
+    for(++s;s!=se&&s->i==i;++s) if(s->bi!=-(uint)1) ++count;
+  }
+  /* write map */
+  p = map = tmalloc(uint,count);
+  for(s=sh->ptr,se=s+sh->n;s!=se;) {
+    uint i=s->i;
+    if(s->bi==-(uint)1) { ++s; continue; }
+    *p++ = i, *p++ = s->bi;
+    for(++s;s!=se&&s->i==i;++s) if(s->bi!=-(uint)1) *p++ = s->bi;
+    *p++ = -(uint)1;
+  }
+  *p = -(uint)1;
+  return map;
+}
+
+
+static struct pw_data *pw_setup_aux(struct array *sh, buffer *buf)
+{
+  struct pw_data *pwd = tmalloc(struct pw_data,1);
+  
+  /* default behavior: receive only remotely unflagged data */
+  pw_comm_setup(&pwd->comm[0],sh, FLAGS_REMOTE, buf);
+  pwd->map[0] = pw_map_setup(sh, buf);
+  sublist(pwd->map[0], &(pwd->submap[0]));
+
+  /* default behavior: send only locally unflagged data */
+  pw_comm_setup(&pwd->comm[1],sh, FLAGS_LOCAL, buf);
+  pwd->map[1] = pw_map_setup(sh, buf);
+  sublist(pwd->map[1], &(pwd->submap[1]));
+
+  pwd->req = tmalloc(comm_req,pwd->comm[0].n+pwd->comm[1].n);
+  pwd->buffer_size = pwd->comm[0].total + pwd->comm[1].total;
+
+  return pwd;
+}
+
+static void pw_free(struct pw_data *data)
+{
+  pw_comm_free(&data->comm[0]);
+  pw_comm_free(&data->comm[1]);
+  free((uint*)data->map[0]);
+  free((uint*)data->map[1]);
+  free(data->req);
+  free(data);
+
+  free((data->submap[0])[0]);
+  free(data->submap[0]);
+  free((data->submap[1])[0]);
+  free(data->submap[1]);
+}
+
+static void pw_setup(struct gs_remote *r, struct gs_topology *top,
+                     const struct comm *comm, buffer *buf)
+{
+  struct pw_data *pwd = pw_setup_aux(&top->sh,buf);
+  r->buffer_size = pwd->buffer_size;
+  r->data = pwd;
+  r->exec = (exec_fun*)&pw_exec;
+  r->fin = (fin_fun*)&pw_free;
+}
+
+/*------------------------------------------------------------------------------
+  Crystal-Router Execution
+------------------------------------------------------------------------------*/
+struct cr_stage {
+  const uint *scatter_map, *gather_map;
+  uint size_r, size_r1, size_r2;
+  uint size_sk, size_s, size_total;
+  uint p1, p2;
+  unsigned nrecvn;
+};
+
+struct cr_data {
+  struct cr_stage *stage[2];
+  unsigned nstages;
+  uint buffer_size, stage_buffer_size;
+};
+
+static void cr_exec(
+  void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op,
+  unsigned transpose, const void *execdata, const struct comm *comm, char *buf)
+{
+  const struct cr_data *crd = execdata;
+  static gs_scatter_fun *const scatter_user_to_buf[] =
+    { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop };
+  static gs_scatter_fun *const scatter_buf_to_buf[] =
+    { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec, &gs_scatter };
+  static gs_scatter_fun *const scatter_buf_to_user[] =
+    { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec_to_many, &scatter_noop };
+  static gs_gather_fun *const gather_buf_to_user[] =
+    { &gs_gather, &gs_gather_vec, &gs_gather_vec_to_many, &gather_noop };
+  static gs_gather_fun *const gather_buf_to_buf[] =
+    { &gs_gather, &gs_gather_vec, &gs_gather_vec, &gs_gather };
+  const unsigned unit_size = vn*gs_dom_size[dom], nstages=crd->nstages;
+  unsigned k;
+  char *sendbuf, *buf_old, *buf_new;
+  const struct cr_stage *stage = crd->stage[transpose];
+  buf_old = buf;
+  buf_new = buf_old + unit_size*crd->stage_buffer_size;
+  /* crystal router */
+  for(k=0;k<nstages;++k) {
+    comm_req req[3];
+    if(stage[k].nrecvn)
+      comm_irecv(&req[1],comm,buf_new,unit_size*stage[k].size_r1,
+               stage[k].p1, comm->np+k);
+    if(stage[k].nrecvn==2)
+      comm_irecv(&req[2],comm,buf_new+unit_size*stage[k].size_r1,
+               unit_size*stage[k].size_r2, stage[k].p2, comm->np+k);
+    sendbuf = buf_new+unit_size*stage[k].size_r;
+    if(k==0)
+      scatter_user_to_buf[mode](sendbuf,data,vn,stage[0].scatter_map,dom);
+    else
+      scatter_buf_to_buf[mode](sendbuf,buf_old,vn,stage[k].scatter_map,dom),
+      gather_buf_to_buf [mode](sendbuf,buf_old,vn,stage[k].gather_map ,dom,op);
+
+    comm_isend(&req[0],comm,sendbuf,unit_size*stage[k].size_s,
+               stage[k].p1, comm->np+k);
+    comm_wait(&req[0],1+stage[k].nrecvn);
+    { char *t = buf_old; buf_old=buf_new; buf_new=t; }
+  }
+  scatter_buf_to_user[mode](data,buf_old,vn,stage[k].scatter_map,dom);
+  gather_buf_to_user [mode](data,buf_old,vn,stage[k].gather_map ,dom,op);
+}
+
+/*------------------------------------------------------------------------------
+  Crystal-Router setup
+------------------------------------------------------------------------------*/
+static void cr_schedule(struct cr_data *data, const struct comm *comm)
+{
+  const uint id = comm->id;
+  uint bl=0, n=comm->np;
+  unsigned k = 0;
+  while(n>1) {
+    uint nl = (n+1)/2, bh = bl+nl;
+    if(id<bh) n=nl; else n-=nl,bl=bh;
+    ++k;
+  }
+  data->nstages = k;
+  data->stage[0] = tmalloc(struct cr_stage,2*(k+1));
+  data->stage[1] = data->stage[0] + (k+1);
+  bl=0, n=comm->np, k=0;
+  while(n>1) {
+    uint nl = (n+1)/2, bh = bl+nl;
+    uint targ; unsigned recvn;
+    recvn = 1, targ = n-1-(id-bl)+bl;
+    if(id==targ) targ=bh, recvn=0;
+    if(n&1 && id==bh) recvn=2;
+    data->stage[1][k].nrecvn=data->stage[0][k].nrecvn=recvn;
+    data->stage[1][k].p1    =data->stage[0][k].p1    =targ;
+    data->stage[1][k].p2    =data->stage[0][k].p2    =comm->id-1;
+    if(id<bh) n=nl; else n-=nl,bl=bh;
+    ++k;
+  }
+}
+
+struct crl_id {
+  ulong id; uint p, ri, si, bi, send;
+};
+
+/* assumes sh is grouped by i (e.g., sorted by i or by id) */
+static void crl_work_init(struct array *cw, struct array *sh,
+                          const unsigned send_mask, uint this_p)
+{
+  const unsigned recv_mask = send_mask^(FLAGS_REMOTE|FLAGS_LOCAL);
+  uint last_i=-(uint)1; int added_myself;
+  uint cw_n = 0, cw_max = cw->max;
+  struct crl_id *w = cw->ptr;
+  struct shared_id *s, *se;
+
+#define CW_ADD(aid,ap,ari,asi) do { \
+    if(cw_n==cw_max)                                         \
+      array_reserve(struct crl_id,cw,cw_n+1),cw_max=cw->max, \
+      w=(struct crl_id*)cw->ptr+cw_n;                        \
+    w->id=aid, w->p=ap, w->ri=ari, w->si=asi;                \
+    ++w, ++cw_n;                                             \
+  } while(0)
+  
+  for(s=sh->ptr,se=s+sh->n;s!=se;++s) {
+    int send = (s->flags&send_mask)==0;
+    int recv = (s->flags&recv_mask)==0;
+    if(s->i!=last_i) last_i=s->i, added_myself=0;
+    if(!added_myself && recv && (s->flags&FLAGS_LOCAL)==0) {
+      added_myself=1;
+      CW_ADD(s->id,this_p,s->i,s->i);
+    }
+    if(send) CW_ADD(s->id,s->p,s->ri,s->i);
+  }
+  cw->n=cw_n;
+#undef CW_ADD  
+}
+
+static void crl_maps(struct cr_stage *stage, struct array *cw, buffer *buf)
+{
+  struct crl_id *w, *we, *other;
+  uint scount=1, gcount=1, *sp, *gp;
+  sarray_sort_2(struct crl_id,cw->ptr,cw->n, bi,0, si,0, buf);
+  for(w=cw->ptr,we=w+cw->n;w!=we;w=other) {
+    uint bi=w->bi,any=0,si=w->si;
+    scount+=3;
+    for(other=w+1;other!=we&&other->bi==bi;++other)
+      if(other->si!=si) si=other->si, any=2, ++gcount;
+    gcount+=any;
+  }
+  stage->scatter_map = sp = tmalloc(uint,scount+gcount);
+  stage->gather_map  = gp = sp + scount;
+  for(w=cw->ptr,we=w+cw->n;w!=we;w=other) {
+    uint bi=w->bi,any=0,si=w->si;
+    *sp++ = w->si, *sp++ = bi;
+    *gp++ = bi;
+    for(other=w+1;other!=we&&other->bi==bi;++other)
+      if(other->si!=si) si=other->si, any=1, *gp++ = si;
+    if(any) *gp++ = -(uint)1; else --gp;
+    *sp++ = -(uint)1;
+  }
+  *sp=-(uint)1, *gp=-(uint)1;
+}
+
+static uint crl_work_label(struct array *cw, struct cr_stage *stage,
+                           uint cutoff, int send_hi, buffer *buf)
+{
+  struct crl_id *w, *we, *start;
+  uint nsend, nkeep = 0, nks = 0, bi=0;
+  /* here w->send has a reverse meaning */
+  if(send_hi) for(w=cw->ptr,we=w+cw->n;w!=we;++w) w->send = w->p< cutoff;
+         else for(w=cw->ptr,we=w+cw->n;w!=we;++w) w->send = w->p>=cutoff;
+  sarray_sort_2(struct crl_id,cw->ptr,cw->n, id,1, send,0, buf);
+  for(start=cw->ptr,w=start,we=w+cw->n;w!=we;++w) {
+    nkeep += w->send;
+    if(w->id!=start->id) start=w;
+    if(w->send!=start->send) w->send=0,w->bi=1, ++nks; else w->bi=0;
+  }
+  nsend = cw->n-nkeep;
+  /* assign indices; sent ids have priority (hence w->send is reversed) */
+  sarray_sort(struct crl_id,cw->ptr,cw->n, send,0, buf);
+  for(start=cw->ptr,w=start,we=w+nsend+nks;w!=we;++w) {
+    if(w->id!=start->id) start=w, ++bi;
+    if(w->bi!=1) w->send=1;   /* switch back to the usual semantics */
+    w->bi = bi;
+  }
+  stage->size_s = nsend+nks==0 ? 0 : bi+1;
+  for(we=(struct crl_id*)cw->ptr+cw->n;w!=we;++w) {
+    if(w->id!=start->id) start=w, ++bi;
+    w->send = 0;              /* switch back to the usual semantics */
+    w->bi = bi;
+  }
+  stage->size_sk = cw->n==0 ? 0 : bi+1;
+  crl_maps(stage,cw,buf);
+  return nsend;
+}
+
+static void crl_bi_to_si(struct crl_id *w, uint n, uint v) {
+  for(;n;--n) w->si=w->bi+v, ++w;
+}
+
+static void crl_ri_to_bi(struct crl_id *w, uint n) {
+  for(;n;--n) w->bi=w->ri, ++w;
+}
+
+static uint cr_learn(struct array *cw, struct cr_stage *stage,
+                     const struct comm *comm, buffer *buf)
+{
+  comm_req req[3];
+  const uint id = comm->id;
+  uint bl=0, n=comm->np;
+  uint size_max=0;
+  uint tag = comm->np;
+  while(n>1) {
+    uint nl = (n+1)/2, bh = bl+nl;
+    uint nkeep, nsend[2], nrecv[2][2] = {{0,0},{0,0}};
+    struct crl_id *wrecv[2], *wsend;
+    nsend[0] = crl_work_label(cw,stage,bh,id<bh,buf);
+    nsend[1] = stage->size_s;
+    nkeep = cw->n - nsend[0];
+
+    if(stage->nrecvn   ) comm_irecv(&req[1],comm,nrecv[0],2*sizeof(uint),
+                                    stage->p1,tag);
+    if(stage->nrecvn==2) comm_irecv(&req[2],comm,nrecv[1],2*sizeof(uint),
+                                    stage->p2,tag);
+    comm_isend(&req[0],comm,nsend,2*sizeof(uint),stage->p1,tag);
+    comm_wait(req,1+stage->nrecvn),++tag;
+    
+    stage->size_r1 = nrecv[0][1], stage->size_r2 = nrecv[1][1];
+    stage->size_r = stage->size_r1 + stage->size_r2;
+    stage->size_total = stage->size_r + stage->size_sk;
+    if(stage->size_total>size_max) size_max=stage->size_total;
+    
+    array_reserve(struct crl_id,cw,cw->n+nrecv[0][0]+nrecv[1][0]);
+    wrecv[0] = cw->ptr, wrecv[0] += cw->n, wrecv[1] = wrecv[0]+nrecv[0][0];
+    wsend = cw->ptr, wsend += nkeep;
+    if(stage->nrecvn   )
+      comm_irecv(&req[1],comm,wrecv[0],nrecv[0][0]*sizeof(struct crl_id),
+                 stage->p1,tag);
+    if(stage->nrecvn==2)
+      comm_irecv(&req[2],comm,wrecv[1],nrecv[1][0]*sizeof(struct crl_id),
+                 stage->p2,tag);
+    sarray_sort_2(struct crl_id,cw->ptr,cw->n, send,0, bi,0, buf);
+    comm_isend(&req[0],comm,wsend,nsend[0]*sizeof(struct crl_id),stage->p1,tag);
+    comm_wait(req,1+stage->nrecvn),++tag;
+
+    crl_bi_to_si(cw->ptr,nkeep,stage->size_r);
+    if(stage->nrecvn)    crl_bi_to_si(wrecv[0],nrecv[0][0],0);
+    if(stage->nrecvn==2) crl_bi_to_si(wrecv[1],nrecv[1][0],stage->size_r1);
+    memmove(wsend,wrecv[0],(nrecv[0][0]+nrecv[1][0])*sizeof(struct crl_id));
+    cw->n += nrecv[0][0] + nrecv[1][0];
+    cw->n -= nsend[0];
+    
+    if(id<bh) n=nl; else n-=nl,bl=bh;
+    ++stage;
+  }
+  crl_ri_to_bi(cw->ptr,cw->n);
+  crl_maps(stage,cw,buf);
+  return size_max;
+}
+
+static struct cr_data *cr_setup_aux(
+  struct array *sh, const struct comm *comm, buffer *buf)
+{
+  uint size_max[2];
+  struct array cw = null_array;
+  struct cr_data *crd = tmalloc(struct cr_data,1);
+  
+  /* default behavior: receive only remotely unflagged data */
+  /* default behavior: send only locally unflagged data */
+  
+  cr_schedule(crd,comm);
+
+  sarray_sort(struct shared_id,sh->ptr,sh->n, i,0, buf);
+  crl_work_init(&cw,sh, FLAGS_LOCAL , comm->id);
+  size_max[0]=cr_learn(&cw,crd->stage[0],comm,buf);
+  crl_work_init(&cw,sh, FLAGS_REMOTE, comm->id);
+  size_max[1]=cr_learn(&cw,crd->stage[1],comm,buf);
+  
+  crd->stage_buffer_size = size_max[1]>size_max[0]?size_max[1]:size_max[0];
+
+  array_free(&cw);
+  
+  crd->buffer_size = 2*crd->stage_buffer_size;
+  return crd;
+}
+
+static void cr_free_stage_maps(struct cr_stage *stage, unsigned kmax)
+{
+  unsigned k;
+  for(k=0; k<kmax; ++k) {
+    free((uint*)stage->scatter_map);
+    ++stage;
+  }
+  free((uint*)stage->scatter_map);
+}
+
+static void cr_free(struct cr_data *data)
+{
+  cr_free_stage_maps(data->stage[0],data->nstages);
+  cr_free_stage_maps(data->stage[1],data->nstages);
+  free(data->stage[0]);
+  free(data);
+}
+
+static void cr_setup(struct gs_remote *r, struct gs_topology *top,
+                     const struct comm *comm, buffer *buf)
+{
+  struct cr_data *crd = cr_setup_aux(&top->sh,comm,buf);
+  r->buffer_size = crd->buffer_size;
+  r->data = crd;
+  r->exec = (exec_fun*)&cr_exec;
+  r->fin = (fin_fun*)&cr_free;
+}
+
+/*------------------------------------------------------------------------------
+  All-reduce Execution
+------------------------------------------------------------------------------*/
+struct allreduce_data {
+  const uint *map_to_buf[2], *map_from_buf[2];
+  uint buffer_size;
+};
+
+static void allreduce_exec(
+  void *data, gs_mode mode, unsigned vn, gs_dom dom, gs_op op,
+  unsigned transpose, const void *execdata, const struct comm *comm, char *buf)
+{
+  const struct allreduce_data *ard = execdata;
+  static gs_scatter_fun *const scatter_to_buf[] =
+    { &gs_scatter, &gs_scatter_vec, &gs_scatter_many_to_vec, &scatter_noop };
+  static gs_scatter_fun *const scatter_from_buf[] =
+    { &gs_scatter, &gs_scatter_vec, &gs_scatter_vec_to_many, &scatter_noop };
+  uint gvn = vn*(ard->buffer_size/2);
+  unsigned unit_size = gs_dom_size[dom];
+  char *ardbuf;
+  ardbuf = buf+unit_size*gvn;
+  /* user array -> buffer */
+  gs_init_array(buf,gvn,dom,op);
+  scatter_to_buf[mode](buf,data,vn,ard->map_to_buf[transpose],dom);
+  /* all reduce */
+  comm_allreduce(comm,dom,op, buf,gvn, ardbuf);
+  /* buffer -> user array */
+  scatter_from_buf[mode](data,buf,vn,ard->map_from_buf[transpose],dom);
+}
+
+/*------------------------------------------------------------------------------
+  All-reduce setup
+------------------------------------------------------------------------------*/
+static const uint *allreduce_map_setup(
+  struct array *pr, const unsigned flags_mask, int to_buf)
+{
+  struct primary_shared_id *p, *pe;
+  uint count=1, *map, *m;
+  for(p=pr->ptr,pe=p+pr->n;p!=pe;++p)
+    if((p->flag&flags_mask)==0) count+=3;
+  m=map=tmalloc(uint,count);
+  if(to_buf) {
+    for(p=pr->ptr,pe=p+pr->n;p!=pe;++p)
+      if((p->flag&flags_mask)==0)
+        *m++ = p->i, *m++ = p->ord, *m++ = -(uint)1;
+  } else {
+    for(p=pr->ptr,pe=p+pr->n;p!=pe;++p)
+      if((p->flag&flags_mask)==0)
+        *m++ = p->ord, *m++ = p->i, *m++ = -(uint)1;
+  }
+  *m=-(uint)1;
+  return map;
+}
+
+static struct allreduce_data *allreduce_setup_aux(
+  struct array *pr, ulong total_shared)
+{
+  struct allreduce_data *ard = tmalloc(struct allreduce_data,1);
+  
+  /* default behavior: reduce only unflagged data, copy to all */
+  ard->map_to_buf  [0] = allreduce_map_setup(pr,1,1);
+  ard->map_from_buf[0] = allreduce_map_setup(pr,0,0);
+
+  /* transpose behavior: reduce all data, copy to unflagged */
+  ard->map_to_buf  [1] = allreduce_map_setup(pr,0,1);
+  ard->map_from_buf[1] = allreduce_map_setup(pr,1,0);
+  
+  ard->buffer_size = total_shared*2;
+  return ard;
+}
+
+static void allreduce_free(struct allreduce_data *ard)
+{
+  free((uint*)ard->map_to_buf[0]);
+  free((uint*)ard->map_to_buf[1]);
+  free((uint*)ard->map_from_buf[0]);
+  free((uint*)ard->map_from_buf[1]);
+  free(ard);
+}
+
+static void allreduce_setup(struct gs_remote *r, struct gs_topology *top,
+                            const struct comm *comm, buffer *buf)
+{
+  struct allreduce_data *ard = allreduce_setup_aux(&top->pr,top->total_shared);
+  r->buffer_size = ard->buffer_size;
+  r->data = ard;
+  r->exec = (exec_fun*)&allreduce_exec;
+  r->fin = (fin_fun*)&allreduce_free;
+}
+
+/*------------------------------------------------------------------------------
+  Automatic Setup --- dynamically picks the fastest method
+------------------------------------------------------------------------------*/
+
+static void dry_run_time(double times[3], const struct gs_remote *r,
+                         const struct comm *comm, buffer *buf)
+{
+  int i; double t;
+  buffer_reserve(buf,gs_dom_size[gs_double]*r->buffer_size);
+  for(i= 2;i;--i)
+    r->exec(0,mode_dry_run,1,gs_double,gs_add,0,r->data,comm,buf->ptr);
+  comm_barrier(comm);
+  t = comm_time();
+  for(i=10;i;--i)
+    r->exec(0,mode_dry_run,1,gs_double,gs_add,0,r->data,comm,buf->ptr);
+  t = (comm_time() - t)/10;
+  times[0] = t/comm->np, times[1] = t, times[2] = t;
+  comm_allreduce(comm,gs_double,gs_add, &times[0],1, &t);
+  comm_allreduce(comm,gs_double,gs_min, &times[1],1, &t);
+  comm_allreduce(comm,gs_double,gs_max, &times[2],1, &t);
+}
+
+static void auto_setup(struct gs_remote *r, struct gs_topology *top,
+                       const struct comm *comm, buffer *buf)
+{
+  pw_setup(r, top,comm,buf);
+  
+  if(comm->np>1) {
+    const char *name = "pairwise";
+    struct gs_remote r_alt;
+    double time[2][3];
+
+    #define DRY_RUN(i,gsr,str) do { \
+      if(comm->id==0) printf("   " str ": "); \
+      dry_run_time(time[i],gsr,comm,buf); \
+      if(comm->id==0) \
+        printf("%g %g %g\n",time[i][0],time[i][1],time[i][2]); \
+    } while(0)
+    
+    #define DRY_RUN_CHECK(str,new_name) do { \
+      DRY_RUN(1,&r_alt,str); \
+      if(time[1][2]<time[0][2]) \
+        time[0][2]=time[1][2], name=new_name, \
+        r->fin(r->data), *r = r_alt; \
+      else \
+        r_alt.fin(r_alt.data); \
+    } while(0)
+
+    DRY_RUN(0, r, "pairwise times (avg, min, max)");
+
+    cr_setup(&r_alt, top,comm,buf);
+    DRY_RUN_CHECK(      "crystal router                ", "crystal router");
+    
+    if(top->total_shared<100000) {
+      allreduce_setup(&r_alt, top,comm,buf);
+      DRY_RUN_CHECK(    "all reduce                    ", "allreduce");
+    }
+
+    #undef DRY_RUN_CHECK
+    #undef DRY_RUN
+
+    if(comm->id==0) printf("   used all_to_all method: %s\n",name);
+  }
+}
+
+/*------------------------------------------------------------------------------
+  Main Execution
+------------------------------------------------------------------------------*/
+struct gs_data {
+  struct comm comm;
+  const uint *map_local[2]; /* 0=unflagged, 1=all */
+  const uint *flagged_primaries;
+  struct gs_remote r;
+  uint **submap_local[2]; /* 0=unflagged, 1=all */
+  uint **subflagged_primaries;
+};
+
+
+static void gs_aux(
+  void *u, gs_mode mode, unsigned vn, gs_dom dom, gs_op op, unsigned transpose,
+  struct gs_data *gsh, buffer *buf)
+{
+  static gs_scatter_fun *const local_scatter[] =
+    { &gs_scatter, &gs_scatter_vec, &gs_scatter_many, &scatter_noop };
+  static gs_gather_fun  *const local_gather [] =
+    { &gs_gather,  &gs_gather_vec,  &gs_gather_many, &gather_noop  };
+  static gs_init_fun *const init[] =
+    { &gs_init, &gs_init_vec, &gs_init_many, &init_noop };
+
+
+  int thd = 0;
+  int inp = 0;
+  #ifdef _OPENMP
+    thd = omp_get_thread_num();
+    inp = omp_in_parallel();
+  #endif
+
+  if(!buf) buf = &static_buffer;
+
+  #pragma omp single
+  {
+    buffer_reserve(buf,vn*gs_dom_size[dom]*gsh->r.buffer_size);
+  }
+
+  if (inp) {
+    local_gather [mode](u,u,vn,(gsh->submap_local[0^transpose])[thd],dom,op);
+    #pragma omp barrier
+
+    if(transpose==0) init[mode](u,vn,(gsh->subflagged_primaries)[thd],dom,op);
+    #pragma omp barrier
+
+    gsh->r.exec(u,mode,vn,dom,op,transpose,gsh->r.data,&gsh->comm,buf->ptr);
+    #pragma omp barrier
+
+    local_scatter[mode](u,u,vn,(gsh->submap_local[1^transpose])[thd],dom);
+    #pragma omp barrier
+
+  } else { 
+    local_gather [mode](u,u,vn,gsh->map_local[0^transpose],dom,op);
+    if(transpose==0) init[mode](u,vn,gsh->flagged_primaries,dom,op);
+    gsh->r.exec(u,mode,vn,dom,op,transpose,gsh->r.data,&gsh->comm,buf->ptr);
+    local_scatter[mode](u,u,vn,gsh->map_local[1^transpose],dom);
+  }
+
+}
+
+void gs(void *u, gs_dom dom, gs_op op, unsigned transpose,
+        struct gs_data *gsh, buffer *buf)
+{
+  gs_aux(u,mode_plain,1,dom,op,transpose,gsh,buf);
+}
+
+void gs_vec(void *u, unsigned vn, gs_dom dom, gs_op op,
+            unsigned transpose, struct gs_data *gsh, buffer *buf)
+{
+  gs_aux(u,mode_vec,vn,dom,op,transpose,gsh,buf);
+}
+
+void gs_many(void *const*u, unsigned vn, gs_dom dom, gs_op op,
+             unsigned transpose, struct gs_data *gsh, buffer *buf)
+{
+  gs_aux((void*)u,mode_many,vn,dom,op,transpose,gsh,buf);
+}
+
+/*------------------------------------------------------------------------------
+  Main Setup
+------------------------------------------------------------------------------*/
+typedef enum { gs_pairwise, gs_crystal_router, gs_all_reduce,
+               gs_auto } gs_method;
+
+
+
+static void local_setup(struct gs_data *gsh, const struct array *nz)
+{
+  gsh->map_local[0] = local_map(nz,1);
+  gsh->map_local[1] = local_map(nz,0);
+  gsh->flagged_primaries = flagged_primaries_map(nz);
+  sublist(gsh->map_local[0], &(gsh->submap_local[0]));
+  sublist(gsh->map_local[1], &(gsh->submap_local[1]));
+  subflagged(gsh->flagged_primaries, &(gsh->subflagged_primaries));
+}
+
+static void gs_setup_aux(struct gs_data *gsh, const slong *id, uint n,
+                         int unique, gs_method method, int verbose)
+{
+  static setup_fun *const remote_setup[] =
+    { &pw_setup, &cr_setup, &allreduce_setup, &auto_setup };
+
+  struct gs_topology top;
+  struct crystal cr;
+  
+  crystal_init(&cr,&gsh->comm);
+
+  get_topology(&top, id,n, &cr);
+  if(unique) make_topology_unique(&top,0,gsh->comm.id,&cr.data);
+
+  local_setup(gsh,&top.nz);
+
+  if(verbose && gsh->comm.id==0)
+    printf("gs_setup: %ld unique labels shared\n",(long)top.total_shared);
+
+  remote_setup[method](&gsh->r, &top,&gsh->comm,&cr.data);
+
+  gs_topology_free(&top);
+  crystal_free(&cr);
+}
+
+struct gs_data *gs_setup(const slong *id, uint n, const struct comm *comm,
+                         int unique, gs_method method, int verbose)
+{
+  struct gs_data *gsh = tmalloc(struct gs_data,1);
+  comm_dup(&gsh->comm,comm);
+  gs_setup_aux(gsh,id,n,unique,method,verbose);
+  return gsh;
+}
+
+void gs_free(struct gs_data *gsh)
+{
+  comm_free(&gsh->comm);
+  free((uint*)gsh->map_local[0]), free((uint*)gsh->map_local[1]);
+  free((uint*)gsh->flagged_primaries);
+  gsh->r.fin(gsh->r.data);
+  free((gsh->submap_local[0])[0]);
+  free(gsh->submap_local[0]);
+  free((gsh->submap_local[1])[0]);
+  free(gsh->submap_local[1]);
+  free((gsh->subflagged_primaries)[0]);
+  free(gsh->subflagged_primaries);
+  free(gsh);
+}
+
+void gs_unique(slong *id, uint n, const struct comm *comm)
+{
+  struct gs_topology top;
+  struct crystal cr;
+  crystal_init(&cr,comm);
+  get_topology(&top, id,n, &cr);
+  make_topology_unique(&top,id,comm->id,&cr.data);
+  gs_topology_free(&top);
+  crystal_free(&cr);
+}
+
+/*------------------------------------------------------------------------------
+  FORTRAN interface
+------------------------------------------------------------------------------*/
+
+#undef gs_op
+
+#undef gs_free
+#undef gs_setup
+#undef gs_many
+#undef gs_vec
+#undef gs
+#define cgs       PREFIXED_NAME(gs      )
+#define cgs_vec   PREFIXED_NAME(gs_vec  )
+#define cgs_many  PREFIXED_NAME(gs_many )
+#define cgs_setup PREFIXED_NAME(gs_setup)
+#define cgs_free  PREFIXED_NAME(gs_free )
+
+#define fgs_setup  FORTRAN_NAME(gs_setup    ,GS_SETUP    )
+#define fgs        FORTRAN_NAME(gs_op       ,GS_OP       )
+#define fgs_vec    FORTRAN_NAME(gs_op_vec   ,GS_OP_VEC   )
+#define fgs_many   FORTRAN_NAME(gs_op_many  ,GS_OP_MANY  )
+#define fgs_fields FORTRAN_NAME(gs_op_fields,GS_OP_FIELDS)
+#define fgs_free   FORTRAN_NAME(gs_free     ,GS_FREE     )
+
+static struct gs_data **fgs_info = 0;
+static int fgs_max = 0;
+static int fgs_n = 0;
+
+void fgs_setup(sint *handle, const slong id[], const sint *n,
+               const MPI_Fint *comm, const sint *np)
+{
+  struct gs_data *gsh;
+  if(fgs_n==fgs_max) fgs_max+=fgs_max/2+1,
+                     fgs_info=trealloc(struct gs_data*,fgs_info,fgs_max);
+  gsh=fgs_info[fgs_n]=tmalloc(struct gs_data,1);
+  comm_init_check(&gsh->comm,*comm,*np);
+  gs_setup_aux(gsh,id,*n,0,gs_pairwise,1);
+  *handle = fgs_n++;
+}
+
+static void fgs_check_handle(sint handle, const char *func, unsigned line)
+{
+  if(handle<0 || handle>=fgs_n || !fgs_info[handle])
+    fail(1,__FILE__,line,"%s: invalid handle", func);
+}
+
+static void fgs_check_parms(sint handle, sint dom, sint op,
+                            const char *func, unsigned line)
+{
+  if(dom<1 || dom>4)
+    fail(1,__FILE__,line,"%s: datatype %d not in valid range 1-4",func,dom);
+  if(op <1 || op >4)
+    fail(1,__FILE__,line,"%s: op %d not in valid range 1-4",func,op);
+  fgs_check_handle(handle,func,line);
+}
+
+void fgs(const sint *handle, void *u, const sint *dom, const sint *op,
+         const sint *transpose)
+{
+  fgs_check_parms(*handle,*dom,*op,"gs_op",__LINE__);
+  cgs(u,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0,fgs_info[*handle],0);
+}
+
+void fgs_vec(const sint *handle, void *u, const sint *n,
+             const sint *dom, const sint *op, const sint *transpose)
+{
+  fgs_check_parms(*handle,*dom,*op,"gs_op_vec",__LINE__);
+  cgs_vec(u,*n,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0,
+          fgs_info[*handle],0);
+}
+
+void fgs_many(const sint *handle, void *u1, void *u2, void *u3,
+              void *u4, void *u5, void *u6, const sint *n,
+              const sint *dom, const sint *op, const sint *transpose)
+{
+  void *uu[6];
+  uu[0]=u1,uu[1]=u2,uu[2]=u3,uu[3]=u4,uu[4]=u5,uu[5]=u6;
+  fgs_check_parms(*handle,*dom,*op,"gs_op_many",__LINE__);
+  cgs_many((void *const*)uu,*n,(gs_dom)(*dom-1),(gs_op_t)(*op-1),*transpose!=0,
+           fgs_info[*handle],0);
+}
+
+static struct array fgs_fields_array = null_array;
+
+void fgs_fields(const sint *handle,
+                void *u, const sint *stride, const sint *n,
+                const sint *dom, const sint *op, const sint *transpose)
+{
+  size_t offset;
+  void **p;
+  uint i;
+  
+  fgs_check_parms(*handle,*dom,*op,"gs_op_fields",__LINE__);
+  if(*n<0) return;
+
+  array_reserve(void*,&fgs_fields_array,*n);
+  p = fgs_fields_array.ptr;
+  offset = *stride * gs_dom_size[*dom-1];
+  for(i=*n;i;--i) *p++ = u, u = (char*)u + offset;
+
+  cgs_many((void *const*)fgs_fields_array.ptr,*n,
+           (gs_dom)(*dom-1),(gs_op_t)(*op-1),
+           *transpose!=0, fgs_info[*handle],0);
+}
+
+void fgs_free(const sint *handle)
+{
+  fgs_check_handle(*handle,"gs_free",__LINE__);
+  cgs_free(fgs_info[*handle]);
+  fgs_info[*handle] = 0;
+}
+
diff --git a/src/jl/gs.h b/src/jl/gs.h
new file mode 100644
index 0000000..43fc142
--- /dev/null
+++ b/src/jl/gs.h
@@ -0,0 +1,141 @@
+#ifndef GS_H
+#define GS_H
+
+#if !defined(COMM_H) || !defined(GS_DEFS_H) || !defined(MEM_H)
+#warning "gs.h" requires "comm.h", "gs_defs.h", and "mem.h"
+#endif
+
+/*
+  Gather/Scatter Library
+
+  The code
+  
+    struct comm c;  // see "comm.h"
+    slong id[n];    // the slong type is defined in "types.h"
+    ...
+    struct gs_data *g = gs_setup(id,n, &c, 0,gs_auto,1);
+    
+  defines a partition of the set of (processor, local index) pairs,
+    (p,i) \in S_j  iff   abs(id[i]) == j  on processor p
+  That is, all (p,i) pairs are grouped together (in group S_j) that have the
+    same id (=j).
+  S_0 is treated specially --- it is ignored completely
+    (i.e., when id[i] == 0, local index i does not participate in any
+    gather/scatter operation
+  If id[i] on proc p is negative then the pair (p,i) is "flagged". This
+  determines the non-symmetric behavior. For the simpler, symmetric case,
+  all id's should be positive.
+  
+  The second to last argument to gs_setup is the method to use, one of
+    gs_pairwise, gs_crystal_router, gs_all_reduce, gs_auto
+  The method "gs_auto" tries ~10 runs of each and chooses the fastest.
+  For a single-use handle, it makes more sense to use "gs_crystal_router".
+  
+  When "g" is no longer needed, free it with
+  
+    gs_free(g);
+  
+  A basic gather/scatter operation is, e.g.,
+  
+    double v[n]; buffer buf;  // see "mem.h" for "buffer"
+    ...
+    gs(v, gs_double,gs_add, 0, g,&buf);
+    
+  The buffer pointer can be null, in which case, a static buffer is used,
+  shared across all gs handles.
+  This gs call has the effect, (in the simple, symmetric, unflagged case)
+  
+    v[i] <--  \sum_{ (p,j) \in S_{id[i]} } v_(p) [j]
+    
+  where v_(p) [j] means v[j] on proc p. In other words, every v[i] is replaced
+  by the sum of all v[j]'s with the same id, given by id[i]. This accomplishes
+  "direct stiffness summation" corresponding to the action of QQ^T, where
+  "Q" is a boolean matrix that copies from a global vector (indexed by id)
+  to the local vectors indexed by (p,i) pairs.
+  
+  Summation on doubles is not the only operation and datatype supported. The
+  full list is defined in "gs_defs.h", and includes the operations
+    gs_add, gs_mul, gs_max, gs_min
+  and datatypes
+    gs_double, gs_float, gs_int, gs_long, gs_sint, gs_slong.
+  (The int and long types are the plain C types, whereas sint and slong
+   are defined in "types.h").
+   
+  For the nonsymmetric behavior, the "transpose" parameter is important:
+  
+    gs(v, gs_double,gs_add, transpose, g,&buf);
+    
+  When transpose == 0, any "flagged" (p,i) pairs (id[i] negative on p)
+  do not participate in the sum, but *do* still receive the sum on output.
+  As a special case, when only one (p,i) pair is unflagged per group this
+  corresponds to the rectangular "Q" matrix referred to above.
+  
+  When transpose == 1, the "flagged" (p,i) pairs *do* participate in the sum,
+  but do *not* get set on output. In the special case of only one unflagged
+  (p,i) pair, this corresponds to the transpose of "Q" referred to above.
+
+
+
+  A version for vectors (contiguously packed) is, e.g.,
+  
+    double v[n][k];
+    gs_vec(v,k, gs_double,gs_add, transpose, g,&buf);
+  
+  which is like "gs" operating on the datatype double[k],
+  with summation here being vector summation. Number of messages sent
+  is independent of k.
+  
+  For combining the communication for "gs" on multiple arrays:
+  
+    double v1[n], v2[n], ..., vk[n];
+    double (*vs)[k] = {v1, v2, ..., vk};
+    
+    gs_many(vs,k, gs_double,op, t, g,&buf);
+  
+  This call is equivalent to
+  
+    gs(v1, gs_double,op, t, g, &buf);
+    gs(v2, gs_double,op, t, g, &buf);
+    ...
+    gs(vk, gs_double,op, t, g, &buf);
+    
+  except that all communication is done together.
+  
+
+
+  Finally, gs_unique has the same basic signature as gs_setup:
+  
+    gs_unique(id,n, &c);
+    
+  This call modifies id, "flagging" (by negating id[i]) all (p,i) pairs in
+  each group except one. The sole "unflagged" member of the group is chosen
+  in an arbitrary but consistent way. If the "unique" flag is set when
+  calling gs_setup, the behavior is equivalent to first calling gs_unique,
+  except that the id array is left unmodified.
+  
+
+*/  
+
+#define gs         PREFIXED_NAME(gs       )
+#define gs_vec     PREFIXED_NAME(gs_vec   )
+#define gs_many    PREFIXED_NAME(gs_many  )
+#define gs_setup   PREFIXED_NAME(gs_setup )
+#define gs_free    PREFIXED_NAME(gs_free  )
+#define gs_unique  PREFIXED_NAME(gs_unique)
+
+struct gs_data;
+typedef enum { gs_pairwise, gs_crystal_router, gs_all_reduce,
+               gs_auto } gs_method;
+
+void gs(void *u, gs_dom dom, gs_op op, unsigned transpose,
+        struct gs_data *gsh, buffer *buf);
+void gs_vec(void *u, unsigned vn, gs_dom dom, gs_op op,
+            unsigned transpose, struct gs_data *gsh, buffer *buf);
+void gs_many(void *const*u, unsigned vn, gs_dom dom, gs_op op,
+             unsigned transpose, struct gs_data *gsh, buffer *buf);
+struct gs_data *gs_setup(const slong *id, uint n, const struct comm *comm,
+                         int unique, gs_method method, int verbose);
+void gs_free(struct gs_data *gsh);
+void gs_unique(slong *id, uint n, const struct comm *comm);
+
+#endif
diff --git a/src/jl/gs_defs.h b/src/jl/gs_defs.h
new file mode 100644
index 0000000..df4ad7b
--- /dev/null
+++ b/src/jl/gs_defs.h
@@ -0,0 +1,81 @@
+#ifndef GS_DEFS_H
+#define GS_DEFS_H
+
+/* requires:
+     <limits.h>, <float.h>   for GS_DEFINE_IDENTITIES()
+     "types.h"               for gs_sint, gs_slong
+*/
+   
+/*------------------------------------------------------------------------------
+  Monoid Definitions
+  
+  Here are defined the domains and operations, each combination being a
+  commutative semigroup, as well as the identity element making each a 
+  commutative monoid.
+------------------------------------------------------------------------------*/
+
+/* the supported domains */
+#define GS_FOR_EACH_DOMAIN(macro) \
+  macro(double) \
+  macro(float ) \
+  macro(int   ) \
+  macro(long  ) \
+  WHEN_LONG_LONG(macro(long_long))
+  
+/* the supported ops */
+#define GS_FOR_EACH_OP(T,macro) \
+  macro(T,add) \
+  macro(T,mul) \
+  macro(T,min) \
+  macro(T,max) \
+  macro(T,bpr)
+
+#define GS_DO_add(a,b) a+=b
+#define GS_DO_mul(a,b) a*=b
+#define GS_DO_min(a,b) if(b<a) a=b
+#define GS_DO_max(a,b) if(b>a) a=b
+#define GS_DO_bpr(a,b) \
+  do if(b!=0) { uint a_ = a; uint b_ = b; \
+       if(a_==0) { a=b_; break; } \
+       for(;;) { if(a_<b_) b_>>=1; else if(b_<a_) a_>>=1; else break; } \
+       a = a_; \
+     } while(0)
+
+/* the monoid identity elements */
+#define GS_DEFINE_MONOID_ID(T,min,max) \
+  static const T gs_identity_##T[] = { 0, 1, max, min, 0 };
+#define GS_DEFINE_IDENTITIES() \
+  GS_DEFINE_MONOID_ID(double, -DBL_MAX,  DBL_MAX) \
+  GS_DEFINE_MONOID_ID(float , -FLT_MAX,  FLT_MAX) \
+  GS_DEFINE_MONOID_ID(int   ,  INT_MIN,  INT_MAX) \
+  GS_DEFINE_MONOID_ID(long  , LONG_MIN, LONG_MAX) \
+  WHEN_LONG_LONG(GS_DEFINE_MONOID_ID(long_long,LLONG_MIN,LLONG_MAX))
+
+/*------------------------------------------------------------------------------
+  Enums and constants
+------------------------------------------------------------------------------*/
+
+/* domain enum */
+#define LIST GS_FOR_EACH_DOMAIN(ITEM) gs_dom_n
+#define ITEM(T) gs_##T,
+typedef enum { LIST } gs_dom;
+#undef ITEM
+#undef LIST
+
+#define gs_sint   TYPE_LOCAL(gs_int,gs_long,gs_long_long)
+#define gs_slong TYPE_GLOBAL(gs_int,gs_long,gs_long_long)
+
+/* domain type size array */
+#define GS_DOM_SIZE_ITEM(T) sizeof(T),
+#define GS_DEFINE_DOM_SIZES() \
+  static const unsigned gs_dom_size[] = \
+    { GS_FOR_EACH_DOMAIN(GS_DOM_SIZE_ITEM) 0 };
+
+/* operation enum */
+#define LIST GS_FOR_EACH_OP(T,ITEM) gs_op_n
+#define ITEM(T,op) gs_##op,
+typedef enum { LIST } gs_op;
+#undef ITEM
+#undef LIST
+
+#endif
diff --git a/src/jl/gs_local.c b/src/jl/gs_local.c
new file mode 100644
index 0000000..2bc246d
--- /dev/null
+++ b/src/jl/gs_local.c
@@ -0,0 +1,336 @@
+#include <string.h>
+#include <limits.h>
+#include <float.h>
+#include "c99.h"
+#include "name.h"
+#include "types.h"
+
+#define gs_gather_array        PREFIXED_NAME(gs_gather_array       )
+#define gs_init_array          PREFIXED_NAME(gs_init_array         )
+#define gs_gather              PREFIXED_NAME(gs_gather             )
+#define gs_scatter             PREFIXED_NAME(gs_scatter            )
+#define gs_init                PREFIXED_NAME(gs_init               )
+#define gs_gather_vec          PREFIXED_NAME(gs_gather_vec         )
+#define gs_scatter_vec         PREFIXED_NAME(gs_scatter_vec        )
+#define gs_init_vec            PREFIXED_NAME(gs_init_vec           )
+#define gs_gather_many         PREFIXED_NAME(gs_gather_many        )
+#define gs_scatter_many        PREFIXED_NAME(gs_scatter_many       )
+#define gs_init_many           PREFIXED_NAME(gs_init_many          )
+#define gs_gather_vec_to_many  PREFIXED_NAME(gs_gather_vec_to_many )
+#define gs_scatter_many_to_vec PREFIXED_NAME(gs_scatter_many_to_vec)
+#define gs_scatter_vec_to_many PREFIXED_NAME(gs_scatter_vec_to_many)
+
+#include "gs_defs.h"
+GS_DEFINE_IDENTITIES()
+GS_DEFINE_DOM_SIZES()
+
+/*------------------------------------------------------------------------------
+  The array gather kernel
+------------------------------------------------------------------------------*/
+#define DEFINE_GATHER(T,OP) \
+static void gather_array_##T##_##OP( \
+  T *restrict out, const T *restrict in, uint n) \
+{                                                                \
+  for(;n;--n) { T q = *in++, *p = out++; GS_DO_##OP(*p,q); }      \
+}
+
+/*------------------------------------------------------------------------------
+  The array initialization kernel
+------------------------------------------------------------------------------*/
+#define DEFINE_INIT(T) \
+static void init_array_##T(T *restrict out, uint n, gs_op op) \
+{                                                             \
+  const T e = gs_identity_##T[op];                            \
+  for(;n;--n) *out++=e;                                       \
+}
+
+#define DEFINE_PROCS(T) \
+  GS_FOR_EACH_OP(T,DEFINE_GATHER) \
+  DEFINE_INIT(T)
+
+GS_FOR_EACH_DOMAIN(DEFINE_PROCS)
+
+#undef DEFINE_PROCS
+#undef DEFINE_INIT
+#undef DEFINE_GATHER
+
+/*------------------------------------------------------------------------------
+  The basic gather kernel
+------------------------------------------------------------------------------*/
+#define DEFINE_GATHER(T,OP) \
+static void gather_##T##_##OP( \
+  T *restrict out, const T *restrict in, const unsigned in_stride,           \
+  const uint *restrict map)                                                  \
+{                                                                            \
+  uint i,j;                                                                  \
+  while((i=*map++)!=-(uint)1) {                                              \
+    T t=out[i];                                                              \
+    j=*map++; do GS_DO_##OP(t,in[j*in_stride]); while((j=*map++)!=-(uint)1); \
+    out[i]=t;                                                                \
+  }                                                                          \
+}
+
+/*------------------------------------------------------------------------------
+  The basic scatter kernel
+------------------------------------------------------------------------------*/
+#define DEFINE_SCATTER(T) \
+static void scatter_##T( \
+  T *restrict out, const unsigned out_stride,                      \
+  const T *restrict in, const unsigned in_stride,                  \
+  const uint *restrict map)                                        \
+{                                                                  \
+  uint i,j;                                                        \
+  while((i=*map++)!=-(uint)1) {                                    \
+    T t=in[i*in_stride];                                           \
+    j=*map++; do out[j*out_stride]=t; while((j=*map++)!=-(uint)1); \
+  }                                                                \
+}
+
+/*------------------------------------------------------------------------------
+  The basic initialization kernel
+------------------------------------------------------------------------------*/
+#define DEFINE_INIT(T) \
+static void init_##T(T *restrict out, const uint *restrict map, gs_op op) \
+{                                                       \
+  uint i; const T e = gs_identity_##T[op];              \
+  while((i=*map++)!=-(uint)1) out[i]=e;                 \
+}
+
+#define DEFINE_PROCS(T) \
+  GS_FOR_EACH_OP(T,DEFINE_GATHER) \
+  DEFINE_SCATTER(T) \
+  DEFINE_INIT(T)
+
+GS_FOR_EACH_DOMAIN(DEFINE_PROCS)
+
+#undef DEFINE_PROCS
+#undef DEFINE_INIT
+#undef DEFINE_SCATTER
+#undef DEFINE_GATHER
+
+/*------------------------------------------------------------------------------
+  The vector gather kernel
+------------------------------------------------------------------------------*/
+#define DEFINE_GATHER(T,OP) \
+static void gather_vec_##T##_##OP( \
+  T *restrict out, const T *restrict in, const unsigned vn,                  \
+  const uint *restrict map)                                                  \
+{                                                                            \
+  uint i,j;                                                                  \
+  while((i=*map++)!=-(uint)1) {                                              \
+    T *restrict p = &out[i*vn], *pe = p+vn;                                  \
+    j=*map++; do {                                                           \
+      const T *restrict q = &in[j*vn];                                       \
+      T *restrict pk=p; do { GS_DO_##OP(*pk,*q); ++pk, ++q; } while(pk!=pe); \
+    } while((j=*map++)!=-(uint)1);                                           \
+  }                                                                          \
+}
+
+/*------------------------------------------------------------------------------
+  The vector scatter kernel
+------------------------------------------------------------------------------*/
+void gs_scatter_vec(
+  void *restrict out, const void *restrict in, const unsigned vn,
+  const uint *restrict map, gs_dom dom)
+{
+  unsigned unit_size = vn*gs_dom_size[dom];
+  uint i,j;
+  while((i=*map++)!=-(uint)1) {
+    const char *t = (const char *)in + i*unit_size;
+    j=*map++; do
+      memcpy((char *)out+j*unit_size,t,unit_size);
+    while((j=*map++)!=-(uint)1);
+  }
+}
+
+/*------------------------------------------------------------------------------
+  The vector initialization kernel
+------------------------------------------------------------------------------*/
+#define DEFINE_INIT(T) \
+static void init_vec_##T(T *restrict out, const unsigned vn, \
+                         const uint *restrict map, gs_op op) \
+{                                                            \
+  uint i; const T e = gs_identity_##T[op];                   \
+  while((i=*map++)!=-(uint)1) {                              \
+    T *restrict u = (T*)out + vn*i, *ue = u+vn;              \
+    do *u++ = e; while(u!=ue);                               \
+  }                                                          \
+}
+
+#define DEFINE_PROCS(T) \
+  GS_FOR_EACH_OP(T,DEFINE_GATHER) \
+  DEFINE_INIT(T)
+
+GS_FOR_EACH_DOMAIN(DEFINE_PROCS)
+
+#undef DEFINE_PROCS
+#undef DEFINE_INIT
+#undef DEFINE_GATHER
+
+#undef DO_bpr
+#undef DO_max
+#undef DO_min
+#undef DO_mul
+#undef DO_add
+
+#define SWITCH_DOMAIN_CASE(T) case gs_##T: WITH_DOMAIN(T); break;
+#define SWITCH_DOMAIN(dom) do switch(dom) { \
+    GS_FOR_EACH_DOMAIN(SWITCH_DOMAIN_CASE) case gs_dom_n: break; } while(0)
+
+#define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break;
+#define SWITCH_OP(T,op) do switch(op) { \
+    GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0)
+
+/*------------------------------------------------------------------------------
+  Array kernels
+------------------------------------------------------------------------------*/
+void gs_gather_array(void *out, const void *in, uint n, gs_dom dom, gs_op op)
+{
+#define WITH_OP(T,OP) gather_array_##T##_##OP(out,in,n)
+#define WITH_DOMAIN(T) SWITCH_OP(T,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+#undef  WITH_OP
+}
+
+void gs_init_array(void *out, uint n, gs_dom dom, gs_op op)
+{
+#define WITH_DOMAIN(T) init_array_##T(out,n,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+/*------------------------------------------------------------------------------
+  Plain kernels; vn parameter ignored but present for consistent signatures
+------------------------------------------------------------------------------*/
+void gs_gather(void *out, const void *in, const unsigned vn,
+               const uint *map, gs_dom dom, gs_op op)
+{
+#define WITH_OP(T,OP) gather_##T##_##OP(out,in,1,map)
+#define WITH_DOMAIN(T) SWITCH_OP(T,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+#undef  WITH_OP
+}
+
+void gs_scatter(void *out, const void *in, const unsigned vn,
+                const uint *map, gs_dom dom)
+{
+#define WITH_DOMAIN(T) scatter_##T(out,1,in,1,map)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+void gs_init(void *out, const unsigned vn, const uint *map,
+             gs_dom dom, gs_op op)
+{
+#define WITH_DOMAIN(T) init_##T(out,map,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+/*------------------------------------------------------------------------------
+  Vector kernels
+------------------------------------------------------------------------------*/
+void gs_gather_vec(void *out, const void *in, const unsigned vn,
+                   const uint *map, gs_dom dom, gs_op op)
+{
+#define WITH_OP(T,OP) gather_vec_##T##_##OP(out,in,vn,map)
+#define WITH_DOMAIN(T) SWITCH_OP(T,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+#undef  WITH_OP
+}
+
+void gs_init_vec(void *out, const unsigned vn, const uint *map,
+                 gs_dom dom, gs_op op)
+{
+#define WITH_DOMAIN(T) init_vec_##T(out,vn,map,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+/*------------------------------------------------------------------------------
+  Multiple array kernels
+------------------------------------------------------------------------------*/
+void gs_gather_many(void *out, const void *in, const unsigned vn,
+                    const uint *map, gs_dom dom, gs_op op)
+{
+  uint k;
+  typedef void *ptr_to_void; typedef const void *ptr_to_const_void;
+  const ptr_to_void *p = out; const ptr_to_const_void *q = in;
+#define WITH_OP(T,OP) for(k=0;k<vn;++k) gather_##T##_##OP(p[k],q[k],1,map)
+#define WITH_DOMAIN(T) SWITCH_OP(T,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+#undef  WITH_OP
+}
+
+void gs_scatter_many(void *out, const void *in, const unsigned vn,
+                     const uint *map, gs_dom dom)
+{
+  uint k;
+  typedef void *ptr_to_void; typedef const void *ptr_to_const_void;
+  const ptr_to_void *p = out; const ptr_to_const_void *q = in;
+#define WITH_DOMAIN(T) for(k=0;k<vn;++k) scatter_##T(p[k],1,q[k],1,map)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+void gs_init_many(void *out, const unsigned vn, const uint *map,
+                  gs_dom dom, gs_op op)
+{
+  uint k;
+  typedef void *ptr_to_void; const ptr_to_void *p = out;
+#define WITH_DOMAIN(T) for(k=0;k<vn;++k) init_##T(p[k],map,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+/*------------------------------------------------------------------------------
+  Gather from strided array -> multiple arrays
+  Scatter from multiple arrays -> strided array,
+  Scatter from strided array -> multiple arrays,
+------------------------------------------------------------------------------*/
+void gs_gather_vec_to_many(void *out, const void *in, const unsigned vn,
+                           const uint *map, gs_dom dom, gs_op op)
+{
+  unsigned i; const unsigned unit_size = gs_dom_size[dom];
+  typedef void *ptr_to_void;
+  const ptr_to_void *p = out; const char *q = in;
+#define WITH_OP(T,OP) \
+  for(i=vn;i;--i) gather_##T##_##OP(*p++,(const T*)q,vn,map), q+=unit_size
+#define WITH_DOMAIN(T) SWITCH_OP(T,op)
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+#undef  WITH_OP
+}
+
+void gs_scatter_many_to_vec(void *out, const void *in, const unsigned vn,
+                            const uint *map, gs_dom dom)
+{
+  unsigned i; const unsigned unit_size = gs_dom_size[dom];
+  typedef const void *ptr_to_const_void;
+  char *p = out; const ptr_to_const_void *q = in;
+#define WITH_DOMAIN(T) \
+  for(i=vn;i;--i) scatter_##T((T*)p,vn,*q++,1,map), p+=unit_size
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+void gs_scatter_vec_to_many(void *out, const void *in, const unsigned vn,
+                            const uint *map, gs_dom dom)
+{
+  unsigned i; const unsigned unit_size = gs_dom_size[dom];
+  typedef void *ptr_to_void;
+  const ptr_to_void *p = out; const char *q = in;
+#define WITH_DOMAIN(T) \
+  for(i=vn;i;--i) scatter_##T(*p++,1,(const T*)q,vn,map), q+=unit_size
+  SWITCH_DOMAIN(dom);
+#undef  WITH_DOMAIN
+}
+
+#undef SWITCH_OP
+#undef SWITCH_OP_CASE
+#undef SWITCH_DOMAIN
+#undef SWITCH_DOMAIN_CASE
diff --git a/src/jl/gs_local.h b/src/jl/gs_local.h
new file mode 100644
index 0000000..fc7c414
--- /dev/null
+++ b/src/jl/gs_local.h
@@ -0,0 +1,43 @@
+#ifndef GS_LOCAL_H
+#define GS_LOCAL_H
+
+#if !defined(NAME_H) || !defined(TYPES_H) || !defined(GS_DEFS_H)
+#warning "gs_local.h" requires "name.h", "types.h", and "gs_defs.h"
+#endif
+
+#define gs_gather_array        PREFIXED_NAME(gs_gather_array       )
+#define gs_init_array          PREFIXED_NAME(gs_init_array         )
+#define gs_gather              PREFIXED_NAME(gs_gather             )
+#define gs_scatter             PREFIXED_NAME(gs_scatter            )
+#define gs_init                PREFIXED_NAME(gs_init               )
+#define gs_gather_vec          PREFIXED_NAME(gs_gather_vec         )
+#define gs_scatter_vec         PREFIXED_NAME(gs_scatter_vec        )
+#define gs_init_vec            PREFIXED_NAME(gs_init_vec           )
+#define gs_gather_many         PREFIXED_NAME(gs_gather_many        )
+#define gs_scatter_many        PREFIXED_NAME(gs_scatter_many       )
+#define gs_init_many           PREFIXED_NAME(gs_init_many          )
+#define gs_gather_vec_to_many  PREFIXED_NAME(gs_gather_vec_to_many )
+#define gs_scatter_many_to_vec PREFIXED_NAME(gs_scatter_many_to_vec)
+#define gs_scatter_vec_to_many PREFIXED_NAME(gs_scatter_vec_to_many)
+
+void gs_gather_array(void *out, const void *in, uint n,
+                     gs_dom dom, gs_op op);
+void gs_init_array(void *out, uint n, gs_dom dom, gs_op op);
+
+typedef void gs_gather_fun(
+  void *out, const void *in, const unsigned vn,
+  const uint *map, gs_dom dom, gs_op op);
+typedef void gs_scatter_fun(
+  void *out, const void *in, const unsigned vn,
+  const uint *map, gs_dom dom);
+typedef void gs_init_fun(
+  void *out, const unsigned vn,
+  const uint *map, gs_dom dom, gs_op op);
+
+extern gs_gather_fun gs_gather, gs_gather_vec, gs_gather_many,
+                     gs_gather_vec_to_many;
+extern gs_scatter_fun gs_scatter, gs_scatter_vec, gs_scatter_many,
+                      gs_scatter_many_to_vec, gs_scatter_vec_to_many;
+extern gs_init_fun gs_init, gs_init_vec, gs_init_many;
+
+#endif
diff --git a/src/jl/gs_test.c b/src/jl/gs_test.c
new file mode 100644
index 0000000..588a52b
--- /dev/null
+++ b/src/jl/gs_test.c
@@ -0,0 +1,68 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "comm.h"
+#include "mem.h"
+#include "gs_defs.h"
+#include "gs.h"
+
+typedef double T;
+const gs_dom dom = gs_double;
+
+static void test(const struct comm *comm)
+{
+  struct gs_data *gsh;
+  const uint np = comm->np;
+  slong *id = tmalloc(slong,np+4);
+  T *v = tmalloc(T,np+4);
+  uint i;
+  id[0] = -(slong)(np+10+3*comm->id);
+  for(i=0;i<np;++i) id[i+1] = -(sint)(i+1);
+  id[np+1] = comm->id+1;
+  id[np+2] = comm->id+1;
+  id[np+3] = np-comm->id;
+  gsh = gs_setup(id,np+4,comm,0,gs_auto,1);
+  free(id);
+  
+  for(i=0;i<np+4;++i) v[i] = 1;
+  gs(v,dom,gs_add,0,gsh,0);
+  if(comm->id==0) for(i=0;i<np+4;++i) printf("%g\n",v[i]);
+  if(comm->id==0) printf("\n");
+  for(i=0;i<np+4;++i) v[i] = 1;
+  gs(v,dom,gs_add,1,gsh,0);
+  if(comm->id==0) for(i=0;i<np+4;++i) printf("%g\n",v[i]);
+
+  gs_free(gsh);
+  free(v);
+}
+
+int main(int narg, char *arg[])
+{
+  comm_ext world; int np;
+  struct comm comm;
+  
+#ifdef MPI
+  MPI_Init(&narg,&arg);
+  world = MPI_COMM_WORLD;
+  MPI_Comm_size(world,&np);
+#else
+  world=0, np=1;
+#endif
+
+  comm_init(&comm,world);
+
+  test(&comm);
+  
+  comm_free(&comm);
+
+#ifdef MPI
+  MPI_Finalize();
+#endif
+
+  return 0;
+}
diff --git a/src/jl/gs_test_old.c b/src/jl/gs_test_old.c
new file mode 100644
index 0000000..b75a2af
--- /dev/null
+++ b/src/jl/gs_test_old.c
@@ -0,0 +1,147 @@
+/* simple stand-alone test for parallel gather-scatter routines
+   assumes gather-scatter routines were compiled with default names
+   can compile to sequential version if MPI is not defined
+   
+   the test is as follows, where N is the number of procs:
+     there are N physical nodes (vertices)
+     each proc has 2 local/virtual nodes mapping to each physical node,
+       for a total of 2*N*N virtual nodes
+     virtual nodes are given values that correspond to a sequential ordering
+       (so that they range from 0 to 2*N*N-1)
+       the addition operation is performed and the result is checked,
+       the correct result being known a priori
+     the addition operation is also checked, in a similar manner, for
+       both the cpgs_op_vec and cpgs_op_many routines with vector dimension 3
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#ifdef MPI
+#  include <mpi.h>
+#else
+   typedef void MPI_Comm;
+#endif
+#include "name.h"
+#include "types.h"
+
+typedef long real;
+sint datatype = 4;
+
+#define fgs_setup     FORTRAN_NAME(gs_setup    ,GS_SETUP    )
+#define fgs_op        FORTRAN_NAME(gs_op       ,GS_OP       )
+#define fgs_op_vec    FORTRAN_NAME(gs_op_vec   ,GS_OP_VEC   )
+#define fgs_op_many   FORTRAN_NAME(gs_op_many  ,GS_OP_MANY  )
+#define fgs_op_fields FORTRAN_NAME(gs_op_fields,GS_OP_FIELDS)
+#define fgs_free      FORTRAN_NAME(gs_free     ,GS_FREE     )
+
+void fgs_setup(sint *handle, const slong id[], const sint *n,
+               const MPI_Comm *comm, const sint *np);
+void fgs_op(const sint *handle, void *u, const sint *dom, const sint *op,
+            const sint *transpose);
+void fgs_op_vec(const sint *handle, void *u, const sint *n,
+                const sint *dom, const sint *op, const sint *transpose);
+void fgs_op_many(const sint *handle, void *u1, void *u2, void *u3,
+                 void *u4, void *u5, void *u6, const sint *n,
+                 const sint *dom, const sint *op, const sint *transpose);
+void fgs_free(const sint *handle);
+
+void assert_is_zero(real v)
+{
+  if(fabs(v) < 1e-20) return;
+  printf("test failed\n");
+  exit(1);
+}
+
+int main(int narg, char* arg[])
+{
+  sint transpose=0;
+  sint id=0,np=1;
+  sint i,handle,maxv=3;
+  real *u;
+  slong *glindex;
+#ifndef MPI
+  int comm;
+#else
+  MPI_Comm comm;
+  MPI_Init(&narg,&arg);
+  MPI_Comm_dup(MPI_COMM_WORLD,&comm);
+  { int i;
+    MPI_Comm_rank(comm,&i); id=i;
+    MPI_Comm_size(comm,&i); np=i;
+  }
+#endif
+
+  glindex = malloc(np*2*sizeof(slong));
+  for(i=0;i<np;++i) glindex[2*i+1] = glindex[2*i] = i+1;
+  i=np*2;
+  fgs_setup(&handle,glindex,&i,&comm,&np);
+  free(glindex);
+  
+  u = malloc(np*2*sizeof(real));
+  for(i=0;i<np;++i) u[2*i  ] = (real)( 2*np*id + 2*i ),
+                    u[2*i+1] = (real)( 2*np*id + 2*i+1 );
+  /*for(i=0;i<np;++i) printf(" (%g %g)", u[2*i], u[2*i+1]); printf("\n");*/
+  i=1, fgs_op(&handle,u,&datatype,&i,&transpose);
+  /*for(i=0;i<np;++i) printf(" (%g %g)", u[2*i], u[2*i+1]); printf("\n");*/
+  for(i=0;i<np;++i) assert_is_zero( np*(2*np*(np-1)+4*i+1) - u[2*i] ),
+                    assert_is_zero( np*(2*np*(np-1)+4*i+1) - u[2*i+1]  );
+  free(u);
+
+  u = malloc(np*2*3*sizeof(real));
+  for(i=0;i<np;++i)
+    u[3*(2*i  )+0] = (real)( 3*(2*np*id + 2*i  ) + 0 ),
+    u[3*(2*i  )+1] = (real)( 3*(2*np*id + 2*i  ) + 1 ),
+    u[3*(2*i  )+2] = (real)( 3*(2*np*id + 2*i  ) + 2 ),
+    u[3*(2*i+1)+0] = (real)( 3*(2*np*id + 2*i+1) + 0 ),
+    u[3*(2*i+1)+1] = (real)( 3*(2*np*id + 2*i+1) + 1 ),
+    u[3*(2*i+1)+2] = (real)( 3*(2*np*id + 2*i+1) + 2 );
+  /*for(i=0;i<np;++i) {
+    int j;
+    printf("%d: ( ", id);
+    for(j=3*(2*i);j<=3*(2*i+1)+2;++j) printf("%g ",u[j]);
+    printf(")\n");
+  }*/
+  i=1, maxv=3, fgs_op_vec(&handle,u,&maxv,&datatype,&i,&transpose);
+  /*for(i=0;i<np;++i) {
+    int j;
+    printf("%d: ( ", id);
+    for(j=3*(2*i);j<=3*(2*i+1)+2;++j) printf("%g ",u[j]);
+    printf(")\n");
+  }*/
+  for(i=0;i<np;++i)
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*0) - u[3*(2*i  )+0] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*1) - u[3*(2*i  )+1] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*2) - u[3*(2*i  )+2] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*0) - u[3*(2*i+1)+0] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*1) - u[3*(2*i+1)+1] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*2) - u[3*(2*i+1)+2] );
+  free(u);
+
+  u = malloc(np*2*3*sizeof(real));
+  for(i=0;i<np;++i)
+    u[2*np*0+(2*i  )] = (real)( 3*(2*np*id + 2*i  ) + 0 ),
+    u[2*np*1+(2*i  )] = (real)( 3*(2*np*id + 2*i  ) + 1 ),
+    u[2*np*2+(2*i  )] = (real)( 3*(2*np*id + 2*i  ) + 2 ),
+    u[2*np*0+(2*i+1)] = (real)( 3*(2*np*id + 2*i+1) + 0 ),
+    u[2*np*1+(2*i+1)] = (real)( 3*(2*np*id + 2*i+1) + 1 ),
+    u[2*np*2+(2*i+1)] = (real)( 3*(2*np*id + 2*i+1) + 2 );
+  i=1, maxv=3, fgs_op_many(&handle,u,u+2*np,u+4*np,0,0,0,&maxv,
+                           &datatype,&i,&transpose);
+  for(i=0;i<np;++i)
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*0) - u[2*np*0+(2*i  )] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*1) - u[2*np*1+(2*i  )] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*2) - u[2*np*2+(2*i  )] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*0) - u[2*np*0+(2*i+1)] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*1) - u[2*np*1+(2*i+1)] ),
+    assert_is_zero( np*(6*np*(np-1)+12*i+3+2*2) - u[2*np*2+(2*i+1)] );
+  free(u);
+  
+  fgs_free(&handle);
+  printf("test on node %d/%d succeeded\n", (int)id+1, (int)np);
+#ifdef MPI  
+  MPI_Comm_free(&comm);
+  MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/src/jl/gs_unique_test.c b/src/jl/gs_unique_test.c
new file mode 100644
index 0000000..ea416dd
--- /dev/null
+++ b/src/jl/gs_unique_test.c
@@ -0,0 +1,72 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "comm.h"
+#include "mem.h"
+#include "gs_defs.h"
+#include "gs.h"
+
+static void test(const struct comm *comm)
+{
+  uint i,np=comm->np,id=comm->id;
+  slong *glindex = tmalloc(slong,np*2);
+  char *out, *buf = tmalloc(char,80+np*2*30);
+  struct gs_data *gsh;
+  
+  for(i=0;i<np;++i) glindex[2*i+1]=glindex[2*i]=i+1;
+  
+  out = buf+sprintf(buf, "%03d bgn : [", (int)comm->id);
+  for(i=0;i<np*2;++i) out += sprintf(out, " %+d", (int)glindex[i]);
+  sprintf(out," ]"), puts(buf);
+  
+  gs_unique(glindex,np*2,comm);
+
+  out = buf+sprintf(buf, "%03d end : [", (int)comm->id);
+  for(i=0;i<np*2;++i) out += sprintf(out, " %+d", (int)glindex[i]);
+  sprintf(out," ]"), puts(buf);
+
+
+  for(i=0;i<np;++i) glindex[2*i+1]=glindex[2*i]=i+1;
+  gsh=gs_setup(glindex,np*2,comm,1,gs_auto,1);
+  for(i=0;i<np;++i) glindex[2*i+1]=glindex[2*i]=id;
+  gs(glindex,gs_slong,gs_add,0,gsh,0);
+  gs_free(gsh);
+
+  out = buf+sprintf(buf, "%03d own : [", (int)comm->id);
+  for(i=0;i<np*2;++i) out += sprintf(out, " %+d", (int)glindex[i]);
+  sprintf(out," ]"), puts(buf);
+
+  free(buf);
+  free(glindex);
+}
+
+int main(int narg, char *arg[])
+{
+  comm_ext world; int np;
+  struct comm comm;
+  
+#ifdef MPI
+  MPI_Init(&narg,&arg);
+  world = MPI_COMM_WORLD;
+  MPI_Comm_size(world,&np);
+#else
+  world=0, np=1;
+#endif
+
+  comm_init(&comm,world);
+
+  test(&comm);
+  
+  comm_free(&comm);
+
+#ifdef MPI
+  MPI_Finalize();
+#endif
+
+  return 0;
+}
diff --git a/src/jl/makefile.cdep b/src/jl/makefile.cdep
new file mode 100644
index 0000000..ae6672b
--- /dev/null
+++ b/src/jl/makefile.cdep
@@ -0,0 +1,48 @@
+amg.o: amg.c gs.h sarray_transfer.h crystal.h comm.h gs_defs.h sarray_sort.h sort.h mem.h fail.h types.h name.h c99.h
+comm.o: comm.c comm.h gs_local.h gs_defs.h tensor.h types.h fail.h name.h
+comm_test.o: comm_test.c comm.h gs_defs.h types.h fail.h name.h
+crs_test.o: crs_test.c crs.h gs.h comm.h gs_defs.h mem.h types.h fail.h name.h c99.h
+crystal.o: crystal.c mem.h comm.h types.h fail.h name.h c99.h
+crystal_test.o: crystal_test.c crystal.h mem.h comm.h types.h fail.h name.h c99.h
+fail.o: fail.c comm.h types.h fail.h name.h
+fcrs.o: fcrs.c crs.h comm.h mem.h types.h fail.h name.h c99.h
+fcrystal.o: fcrystal.c sarray_transfer.h sarray_sort.h sort.h crystal.h comm.h mem.h types.h fail.h name.h c99.h
+findpts.o: findpts.c findpts_imp.h findpts_imp.h sarray_sort.h sort.h sarray_transfer.h crystal.h comm.h gs_defs.h findpts_local.h findpts_el.h obbox.h poly.h mem.h fail.h types.h name.h c99.h
+findpts_el_2.o: findpts_el_2.c poly.h tensor.h mem.h types.h fail.h name.h c99.h
+findpts_el_2_test2.o: findpts_el_2_test2.c rdtsc.h rand_elt_test.h findpts_el.h obbox.h lob_bnd.h poly.h tensor.h mem.h fail.h name.h types.h c99.h
+findpts_el_2_test.o: findpts_el_2_test.c findpts_el.h poly.h mem.h fail.h types.h name.h c99.h
+findpts_el_3.o: findpts_el_3.c poly.h tensor.h mem.h types.h fail.h name.h c99.h
+findpts_el_3_test2.o: findpts_el_3_test2.c rdtsc.h rand_elt_test.h findpts_el.h obbox.h lob_bnd.h poly.h tensor.h mem.h fail.h name.h types.h c99.h
+findpts_el_3_test.o: findpts_el_3_test.c findpts_el.h poly.h mem.h fail.h types.h name.h c99.h
+findpts_local.o: findpts_local.c findpts_local_imp.h findpts_local_imp.h findpts_el.h sarray_sort.h sort.h poly.h obbox.h mem.h fail.h name.h types.h c99.h
+findpts_local_test.o: findpts_local_test.c rand_elt_test.h findpts_local.h findpts_el.h obbox.h poly.h types.h mem.h fail.h name.h c99.h
+findpts_test.o: findpts_test.c sarray_transfer.h crystal.h findpts.h rand_elt_test.h comm.h gs_defs.h poly.h mem.h types.h fail.h name.h c99.h
+gen_poly_imp.o: gen_poly_imp.c
+gs.o: gs.c sarray_transfer.h sarray_sort.h crystal.h sort.h mem.h comm.h gs_local.h gs_defs.h types.h fail.h name.h c99.h
+gs_local.o: gs_local.c gs_defs.h types.h name.h c99.h
+gs_test.o: gs_test.c gs.h gs_defs.h mem.h comm.h types.h fail.h name.h c99.h
+gs_test_old.o: gs_test_old.c types.h name.h
+gs_unique_test.o: gs_unique_test.c gs.h gs_defs.h mem.h comm.h types.h fail.h name.h c99.h
+lob_bnd.o: lob_bnd.c poly.h mem.h fail.h types.h name.h c99.h
+lob_bnd_test.o: lob_bnd_test.c lob_bnd.h poly.h tensor.h mem.h fail.h name.h types.h c99.h
+obbox.o: obbox.c lob_bnd.h poly.h tensor.h mem.h types.h fail.h name.h c99.h
+obbox_test.o: obbox_test.c rand_elt_test.h obbox.h lob_bnd.h poly.h mem.h fail.h name.h types.h c99.h
+poly.o: poly.c poly_imp.h mem.h types.h fail.h name.h c99.h
+poly_test2.o: poly_test2.c rdtsc.h poly.h mem.h fail.h name.h types.h c99.h
+poly_test.o: poly_test.c poly.h types.h name.h c99.h
+rand_elt_test.o: rand_elt_test.c lob_bnd.h poly.h name.h types.h c99.h
+sarray_sort.o: sarray_sort.c sort.h mem.h fail.h types.h name.h c99.h
+sarray_sort_test.o: sarray_sort_test.c sarray_sort.h sort.h mem.h types.h fail.h name.h c99.h
+sarray_transfer.o: sarray_transfer.c sort.h crystal.h mem.h comm.h types.h fail.h name.h c99.h
+sarray_transfer_test.o: sarray_transfer_test.c sarray_transfer.h crystal.h sarray_sort.h sort.h mem.h comm.h types.h fail.h name.h c99.h
+sort.o: sort.c sort_imp.h sort_imp.h sort_imp.h mem.h types.h fail.h name.h c99.h
+sort_test2.o: sort_test2.c rdtsc.h sort.h mem.h types.h fail.h name.h c99.h
+sort_test.o: sort_test.c sort.h mem.h types.h fail.h name.h c99.h
+sparse_cholesky.o: sparse_cholesky.c sort.h mem.h types.h fail.h name.h c99.h
+spchol_test.o: spchol_test.c sparse_cholesky.h mem.h types.h fail.h name.h c99.h
+tensor.o: tensor.c types.h name.h c99.h
+xxt.o: xxt.c gs.h sparse_cholesky.h sarray_sort.h sort.h mem.h comm.h gs_defs.h tensor.h types.h fail.h name.h c99.h
+xxt_test2.o: xxt_test2.c crs.h mem.h comm.h types.h fail.h name.h c99.h
+xxt_test.o: xxt_test.c crs.h comm.h types.h fail.h name.h
+
+OBJECTS= amg.o comm.o comm_test.o crs_test.o crystal.o crystal_test.o fail.o fcrs.o fcrystal.o findpts.o findpts_el_2.o findpts_el_2_test2.o findpts_el_2_test.o findpts_el_3.o findpts_el_3_test2.o findpts_el_3_test.o findpts_local.o findpts_local_test.o findpts_test.o gen_poly_imp.o gs.o gs_local.o gs_test.o gs_test_old.o gs_unique_test.o lob_bnd.o lob_bnd_test.o obbox.o obbox_test.o poly.o poly_test2.o poly_test.o rand_elt_test.o sarray_sort.o sarray_sort_test.o sarray_transfer.o sarray_transfer_test.o sort.o sort_test2.o sort_test.o sparse_cholesky.o spchol_test.o tensor.o xxt.o xxt_test2.o xxt_test.o
diff --git a/src/jl/mem.h b/src/jl/mem.h
new file mode 100644
index 0000000..e55b81a
--- /dev/null
+++ b/src/jl/mem.h
@@ -0,0 +1,168 @@
+#ifndef MEM_H
+#define MEM_H
+
+/* requires:
+     <stddef.h> for size_t, offsetof
+     <stdlib.h> for malloc, calloc, realloc, free
+     <string.h> for memcpy
+     "c99.h"
+     "fail.h"
+*/
+
+#if !defined(C99_H) || !defined(FAIL_H)
+#error "mem.h" requires "c99.h" and "fail.h"
+#endif
+
+/* 
+   All memory management goes through the wrappers defined in this
+   header. Diagnostics can be turned on with
+     -DPRINT_MALLOCS=1
+   Then all memory management operations will be printed to stdout.
+   
+   Most memory management occurs through use of the "array" type,
+   defined below, which defines a generic dynamically-sized array
+   that grows in bursts. The "buffer" type is a "char" array and
+   is often passed around by code to provide a common area for
+   scratch work.
+*/
+
+#ifndef PRINT_MALLOCS
+#  define PRINT_MALLOCS 0
+#else
+#  include <stdio.h>
+#  ifndef comm_gbl_id
+#    define comm_gbl_id PREFIXED_NAME(comm_gbl_id)
+#    define comm_gbl_np PREFIXED_NAME(comm_gbl_np)
+#    include "types.h"
+     extern uint comm_gbl_id, comm_gbl_np;
+#  endif
+#endif
+
+/*--------------------------------------------------------------------------
+   Memory Allocation Wrappers to Catch Out-of-memory
+  --------------------------------------------------------------------------*/
+
+static inline void *smalloc(size_t size, const char *file, unsigned line)
+{
+  void *restrict res = malloc(size);
+  #if PRINT_MALLOCS
+  fprintf(stdout,"MEM: proc %04d: %p = malloc(%ld) @ %s(%u)\n",
+          (int)comm_gbl_id,res,(long)size,file,line), fflush(stdout);
+  #endif
+  if(!res && size)
+    fail(1,file,line,"allocation of %ld bytes failed\n",(long)size);
+  return res;
+}
+
+static inline void *scalloc(
+  size_t nmemb, size_t size, const char *file, unsigned line)
+{
+  void *restrict res = calloc(nmemb, size);
+  #if PRINT_MALLOCS
+  fprintf(stdout,"MEM: proc %04d: %p = calloc(%ld) @ %s(%u)\n",
+          (int)comm_gbl_id,res,(long)size*nmemb,file,line), fflush(stdout);
+  #endif
+  if(!res && nmemb)
+    fail(1,file,line,"allocation of %ld bytes failed\n",
+           (long)size*nmemb);
+  return res;
+}
+
+static inline void *srealloc(
+  void *restrict ptr, size_t size, const char *file, unsigned line)
+{
+  void *restrict res = realloc(ptr, size);
+  #if PRINT_MALLOCS
+  if(res!=ptr) {
+    if(ptr)
+      fprintf(stdout,"MEM: proc %04d: %p freed by realloc @ %s(%u)\n",
+              (int)comm_gbl_id,ptr,file,line), fflush(stdout);
+    fprintf(stdout,"MEM: proc %04d: %p = realloc of %p to %lu @ %s(%u)\n",
+            (int)comm_gbl_id,res,ptr,(long)size,file,line), fflush(stdout);
+  } else
+    fprintf(stdout,"MEM: proc %04d: %p realloc'd to %lu @ %s(%u)\n",
+            (int)comm_gbl_id,res,(long)size,file,line), fflush(stdout);
+  #endif
+  if(!res && size)
+    fail(1,file,line,"allocation of %ld bytes failed\n",(long)size);
+  return res;
+}
+
+#define tmalloc(type, count) \
+  ((type*) smalloc((count)*sizeof(type),__FILE__,__LINE__) )
+#define tcalloc(type, count) \
+  ((type*) scalloc((count),sizeof(type),__FILE__,__LINE__) )
+#define trealloc(type, ptr, count) \
+  ((type*) srealloc((ptr),(count)*sizeof(type),__FILE__,__LINE__) )
+
+#if PRINT_MALLOCS
+static inline void sfree(void *restrict ptr, const char *file, unsigned line)
+{
+  free(ptr);
+  fprintf(stdout,"MEM: proc %04d: %p freed @ %s(%u)\n",
+          (int)comm_gbl_id,ptr,file,line), fflush(stdout);
+}
+#define free(x) sfree(x,__FILE__,__LINE__)
+#endif
+
+/*--------------------------------------------------------------------------
+   A dynamic array
+  --------------------------------------------------------------------------*/
+struct array { void *ptr; size_t n,max; };
+#define null_array {0,0,0}
+static void array_init_(struct array *a, size_t max, size_t size,
+                        const char *file, unsigned line)
+{
+  a->n=0, a->max=max, a->ptr=smalloc(max*size,file,line);
+}
+static void array_resize_(struct array *a, size_t max, size_t size,
+                          const char *file, unsigned line)
+{
+  a->max=max, a->ptr=srealloc(a->ptr,max*size,file,line);
+}
+static void *array_reserve_(struct array *a, size_t min, size_t size,
+                            const char *file, unsigned line)
+{
+  size_t max = a->max;
+  if(max<min) {
+    max+=max/2+1;
+    if(max<min) max=min;
+    array_resize_(a,max,size,file,line);
+  }
+  return a->ptr;
+}
+
+#define array_free(a) (free((a)->ptr))
+#define array_init(T,a,max) array_init_(a,max,sizeof(T),__FILE__,__LINE__)
+#define array_resize(T,a,max) array_resize_(a,max,sizeof(T),__FILE__,__LINE__)
+#define array_reserve(T,a,min) array_reserve_(a,min,sizeof(T),__FILE__,__LINE__)
+
+static void array_cat_(size_t size, struct array *d, const void *s, size_t n,
+                       const char *file, unsigned line)
+{
+  char *out = array_reserve_(d,d->n+n,size, file,line);
+  memcpy(out+d->n*size, s, n*size);
+  d->n+=n;
+}
+
+#define array_cat(T,d,s,n) array_cat_(sizeof(T),d,s,n,__FILE__,__LINE__)
+
+/*--------------------------------------------------------------------------
+   Buffer = char array
+  --------------------------------------------------------------------------*/
+typedef struct array buffer;
+#define null_buffer null_array
+#define buffer_init(b,max) array_init(char,b,max)
+#define buffer_resize(b,max) array_resize(char,b,max)
+#define buffer_reserve(b,max) array_reserve(char,b,max)
+#define buffer_free(b) array_free(b)
+
+/*--------------------------------------------------------------------------
+   Alignment routines
+  --------------------------------------------------------------------------*/
+#define ALIGNOF(T) offsetof(struct { char c; T x; }, x)
+static size_t align_as_(size_t a, size_t n) { return (n+a-1)/a*a; }
+#define align_as(T,n) align_as_(ALIGNOF(T),n)
+#define align_ptr(T,base,offset) ((T*)((char*)(base)+align_as(T,offset)))
+#endif
+
diff --git a/src/jl/name.h b/src/jl/name.h
new file mode 100644
index 0000000..b4bcd91
--- /dev/null
+++ b/src/jl/name.h
@@ -0,0 +1,44 @@
+#ifndef NAME_H
+#define NAME_H
+
+/* establishes some macros to establish
+   * the FORTRAN naming convention
+     default      gs_setup, etc.
+     -DUPCASE     GS_SETUP, etc.
+     -DUNDERSCORE gs_setup_, etc.
+   * a prefix for all external (non-FORTRAN) function names
+     for example, -DPREFIX=jl_   transforms fail -> jl_fail
+   * a prefix for all external FORTRAN function names     
+     for example, -DFPREFIX=jlf_ transforms gs_setup_ -> jlf_gs_setup_
+*/
+
+/* the following macro functions like a##b,
+   but will expand a and/or b if they are themselves macros */
+#define TOKEN_PASTE_(a,b) a##b
+#define TOKEN_PASTE(a,b) TOKEN_PASTE_(a,b)
+
+#ifdef PREFIX
+#  define PREFIXED_NAME(x) TOKEN_PASTE(PREFIX,x)
+#else
+#  define PREFIXED_NAME(x) x
+#endif
+
+#ifdef FPREFIX
+#  define FPREFIXED_NAME(x) TOKEN_PASTE(FPREFIX,x)
+#else
+#  define FPREFIXED_NAME(x) x
+#endif
+
+#if defined(UPCASE)
+#  define FORTRAN_NAME(low,up) FPREFIXED_NAME(up)
+#  define FORTRAN_UNPREFIXED(low,up) up
+#elif defined(UNDERSCORE)
+#  define FORTRAN_NAME(low,up) FPREFIXED_NAME(TOKEN_PASTE(low,_))
+#  define FORTRAN_UNPREFIXED(low,up) TOKEN_PASTE(low,_)
+#else
+#  define FORTRAN_NAME(low,up) FPREFIXED_NAME(low)
+#  define FORTRAN_UNPREFIXED(low,up) low
+#endif
+
+#endif
+
diff --git a/src/jl/odep_info.py b/src/jl/odep_info.py
new file mode 100755
index 0000000..620d0ec
--- /dev/null
+++ b/src/jl/odep_info.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+
+import sys, os, re
+
+obj_files = sys.argv[1:]
+
+defined = dict((x,set([])) for x in obj_files)
+undefined = dict((x,set([])) for x in obj_files)
+nm_re = re.compile("[0-9a-fA-F]*\s*([BCDRTU])\s+([A-Za-z_][A-Za-z_0-9]*)\s*")
+def nm_match(x): return ( nm_re.match(line) for line in os.popen('nm -g '+x) )
+def nm_line(x,m):
+	if m.group(1)=='U': undefined[x].add(m.group(2))
+	else: defined[x].add(m.group(2))
+[ [ nm_line(x,m) for m in nm_match(x) if m!=None ] for x in obj_files ]
+
+def closure(seq,f):
+	v = [], [x for x in seq], set(x for x in seq)
+	while len(v[1]): [(v[1].append(y),v[2].add(y)) for y in 
+	  f((lambda x: (v[0].append(x),x)[1])(v[1].pop())) if not y in v[2]]
+	return v[0]
+
+needs={}
+def get_needs(x):
+	if not needs.has_key(x):
+		needs[x]=[y for y in obj_files if len(defined[y]&undefined[x])]
+	return needs[x]
+deps = dict((x,closure(get_needs(x),get_needs)) for x in obj_files)
+
+for x in deps:
+	print x,'depends on',reduce((lambda a,b: a+" "+b),deps[x],"")
+print
+
+results = [ os.path.splitext(x)[0] for x in obj_files if 'main' in defined[x] ]
+print "RESULTS="+reduce((lambda a,b: a+" "+b),results,"")
+print
+
+def need_X(objs):
+	for x in objs:
+		if "XOpenDisplay" in undefined[x]: return True
+	return False
+
+for x in results:
+	objs = deps[x+'.o'];
+	if not (x+'.o') in objs: objs.append(x+'.o')
+	sobjs = reduce((lambda a,b: a+" "+b),objs,"")
+	if need_X(objs):
+		print x+":"+sobjs+" ; @echo LINK $@; $(LINKCMD) $^ -lX11 -o $@"
+	else:
+		print x+":"+sobjs+" ; @echo LINK $@; $(LINKCMD) $^ -o $@"
+
diff --git a/src/jl/rand_elt_test.c b/src/jl/rand_elt_test.c
new file mode 100644
index 0000000..1e11dae
--- /dev/null
+++ b/src/jl/rand_elt_test.c
@@ -0,0 +1,169 @@
+#include <stdlib.h>
+#include <math.h>
+#include "c99.h"
+#include "types.h"
+#include "name.h"
+#include "poly.h"
+#include "lob_bnd.h"
+
+static double det_2(const double A[4]) { return A[0]*A[3]-A[1]*A[2]; }
+
+static double quad_2(const double x0, const double g[2], const double H[3],
+                     const double r[2])
+{
+  return x0 + (g[0]*r[0]+g[1]*r[1])
+            + (  r[0] * (H[0]*r[0]+H[1]*r[1])
+               + r[1] * (H[1]*r[0]+H[2]*r[1]) )/2;
+}
+
+static void quad_2_grad(double grad[2], const double g[2], const double H[3],
+                        const double r[2])
+{
+  grad[0] = g[0] + (H[0]*r[0]+H[1]*r[1]);
+  grad[1] = g[1] + (H[1]*r[0]+H[2]*r[1]);
+}
+
+static double quad_2_jac(const double g[4], const double H[6],
+                         const double r[2])
+{
+  double J[4];
+  quad_2_grad(J  ,g  ,H  ,r);
+  quad_2_grad(J+2,g+2,H+3,r);
+  return det_2(J);
+}
+
+static double det_3(const double A[9])
+{
+  const double a = A[4]*A[8]-A[5]*A[7],
+               b = A[5]*A[6]-A[3]*A[8],
+               c = A[3]*A[7]-A[4]*A[6];
+  return A[0]*a+A[1]*b+A[2]*c;
+}
+
+static double quad_3(const double x0, const double g[3], const double H[6],
+                     const double r[3])
+{
+  return x0 + (g[0]*r[0]+g[1]*r[1]+g[2]*r[2])
+            + (  r[0] * (H[0]*r[0]+H[1]*r[1]+H[2]*r[2])
+               + r[1] * (H[1]*r[0]+H[3]*r[1]+H[4]*r[2])
+               + r[2] * (H[2]*r[0]+H[4]*r[1]+H[5]*r[2]) )/2;
+}
+
+static void quad_3_grad(double grad[3], const double g[3], const double H[6],
+                        const double r[3])
+{
+  grad[0] = g[0] + (H[0]*r[0]+H[1]*r[1]+H[2]*r[2]);
+  grad[1] = g[1] + (H[1]*r[0]+H[3]*r[1]+H[4]*r[2]);
+  grad[2] = g[2] + (H[2]*r[0]+H[4]*r[1]+H[5]*r[2]);
+}
+
+static double quad_3_jac(const double g[9], const double H[18],
+                         const double r[3])
+{
+  double J[9];
+  quad_3_grad(J  ,g  ,H   ,r);
+  quad_3_grad(J+3,g+3,H+ 6,r);
+  quad_3_grad(J+6,g+6,H+12,r);
+  return det_3(J);
+}
+
+void rand_elt_2(double *x, double *y,
+                const double *zr, unsigned nr,
+                const double *zs, unsigned ns)
+{
+  static int init=0;
+  static double z4[4], lob_bnd_data[16+3*4*(2*16+1)],
+                work[2*16*(4+16+1)];
+  unsigned i,j;
+  double x0[2], g[4], H[6], jac[4*4], r[2];
+  struct dbl_range jr;
+  if(!init) {
+    init=1;
+    lobatto_nodes(z4,4);
+    lob_bnd_setup(lob_bnd_data,4,16);
+  }
+  do {
+    for(i=0;i<4;++i) g[i] = -1+2*(rand()/(double)RAND_MAX);
+    for(i=0;i<6;++i) H[i] =.5*(-1+2*(rand()/(double)RAND_MAX));
+    for(j=0;j<4;++j) { r[1] = z4[j];
+      for(i=0;i<4;++i) { r[0] = z4[i];
+        jac[j*4+i] = quad_2_jac(g,H,r);
+      }
+    }
+    jr = lob_bnd_2(lob_bnd_data,4,16, lob_bnd_data,4,16, jac, work);
+    /*printf("Jacobian range %g, %g\n", jr.min, jr.max);*/
+  } while(jr.max*jr.min<=0);
+  for(i=0;i< 2;++i) x0[i] = -1+2*(rand()/(double)RAND_MAX);
+  for(j=0;j<ns;++j) {   r[1] = zs[j];
+    for(i=0;i<nr;++i) { r[0] = zr[i];
+      x[j*nr+i] = quad_2(x0[0],g  ,H  ,r);
+      y[j*nr+i] = quad_2(x0[1],g+2,H+3,r);
+    }
+  }
+}
+
+void rand_elt_3(double *x, double *y, double *z,
+                const double *zr, unsigned nr,
+                const double *zs, unsigned ns,
+                const double *zt, unsigned nt)
+{
+  static int init=0;
+  static double z4[4], lob_bnd_data[16+3*4*(2*16+1)],
+                work[2*16*16*(4+16+1)];
+  unsigned i,j,k;
+  double x0[3], g[9], H[18], jac[4*4*4], r[3];
+  struct dbl_range jr;
+  if(!init) {
+    init=1;
+    lobatto_nodes(z4,4);
+    lob_bnd_setup(lob_bnd_data,4,16);
+  }
+  do {
+    for(i=0;i< 9;++i) g[i] = -1+2*(rand()/(double)RAND_MAX);
+    for(i=0;i<18;++i) H[i] =.5*(-1+2*(rand()/(double)RAND_MAX));
+    for(k=0;k<4;++k) { r[2] = z4[k];
+      for(j=0;j<4;++j) { r[1] = z4[j];
+        for(i=0;i<4;++i) { r[0] = z4[i];
+          jac[(k*4+j)*4+i] = quad_3_jac(g,H,r);
+        }
+      }
+    }
+    jr = lob_bnd_3(lob_bnd_data,4,16, lob_bnd_data,4,16, lob_bnd_data,4,16,
+                   jac, work);
+    /*printf("Jacobian range %g, %g\n", jr.min, jr.max);*/
+  } while(jr.max*jr.min<=0);
+  for(i=0;i< 3;++i) x0[i] = -1+2*(rand()/(double)RAND_MAX);
+  for(k=0;k<nt;++k) {     r[2] = zt[k];
+    for(j=0;j<ns;++j) {   r[1] = zs[j];
+      for(i=0;i<nr;++i) { r[0] = zr[i];
+        x[(k*ns+j)*nr+i] = quad_3(x0[0],g  ,H   ,r);
+        y[(k*ns+j)*nr+i] = quad_3(x0[1],g+3,H+ 6,r);
+        z[(k*ns+j)*nr+i] = quad_3(x0[2],g+6,H+12,r);
+      }
+    }
+  }
+}
+
+#define PI 3.1415926535897932384626433832795028841971693993751058209749445923
+
+void bubble_elt(double *x, double *y, double *z,
+                const double *zr, unsigned nr,
+                const double *zs, unsigned ns,
+                const double *zt, unsigned nt, int type)
+{
+  unsigned i,j,k;
+  for(k=0;k<nt;++k) for(j=0;j<ns;++j) for(i=0;i<nr;++i) {
+    double dx=0,dy=0,dz=0;
+    switch(type) {
+      case 0: dx =  cos(PI*zs[j]/2)*cos(PI*zt[k]/2); break;
+      case 1: dx = -cos(PI*zs[j]/2)*cos(PI*zt[k]/2); break;
+      case 2: dy =  cos(PI*zt[k]/2)*cos(PI*zr[i]/2); break;
+      case 3: dy = -cos(PI*zt[k]/2)*cos(PI*zr[i]/2); break;
+      case 4: dz =  cos(PI*zr[i]/2)*cos(PI*zs[j]/2); break;
+      case 5: dz = -cos(PI*zr[i]/2)*cos(PI*zs[j]/2); break;
+    }
+    x[(k*ns+j)*nr+i] = zr[i] + dx;
+    y[(k*ns+j)*nr+i] = zs[j] + dy;
+    z[(k*ns+j)*nr+i] = zt[k] + dz;
+  }
+}
diff --git a/src/jl/rand_elt_test.h b/src/jl/rand_elt_test.h
new file mode 100644
index 0000000..40c5325
--- /dev/null
+++ b/src/jl/rand_elt_test.h
@@ -0,0 +1,18 @@
+#ifndef RAND_ELT_TEST_H
+#define RAND_ELT_TEST_H
+
+void rand_elt_2(double *x, double *y,
+                const double *zr, unsigned nr,
+                const double *zs, unsigned ns);
+
+void rand_elt_3(double *x, double *y, double *z,
+                const double *zr, unsigned nr,
+                const double *zs, unsigned ns,
+                const double *zt, unsigned nt);
+
+void bubble_elt(double *x, double *y, double *z,
+                const double *zr, unsigned nr,
+                const double *zs, unsigned ns,
+                const double *zt, unsigned nt, int type);
+
+#endif
diff --git a/src/jl/rdtsc.h b/src/jl/rdtsc.h
new file mode 100644
index 0000000..b39663e
--- /dev/null
+++ b/src/jl/rdtsc.h
@@ -0,0 +1,12 @@
+#ifndef RDTSC_H
+#define RDTSC_H
+
+#define DEFINE_HW_COUNTER() \
+static __inline__ unsigned long long getticks(void) \
+{ \
+   volatile unsigned low, high; \
+   __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)); \
+   return ((unsigned long long)high)<<32 | low; \
+}
+
+#endif
diff --git a/src/jl/sarray_sort.c b/src/jl/sarray_sort.c
new file mode 100644
index 0000000..0ec26d1
--- /dev/null
+++ b/src/jl/sarray_sort.c
@@ -0,0 +1,45 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "types.h"
+#include "fail.h"
+#include "mem.h"
+#include "sort.h"
+
+#define sarray_permute_     PREFIXED_NAME(sarray_permute_)
+#define sarray_permute_buf_ PREFIXED_NAME(sarray_permute_buf_)
+
+void sarray_permute_(size_t size, void *A, size_t n, uint *perm, void *work)
+{
+  char *const ar = A, *const item = work;
+  sint *const fperm = (sint*)perm;
+  uint i;
+  for(i=0;i<n;++i) {
+    sint pi = fperm[i];
+    if(pi<0) { fperm[i] = -pi-1; continue; }
+    else if((uint)pi==i) continue;
+    else {
+      char *dst = ar+i*size, *src = ar+pi*size;
+      memcpy(item, dst, size);
+      for(;;) {
+        sint ppi;
+        memcpy(dst, src, size);
+        dst=src;
+        ppi=fperm[pi], fperm[pi]=-ppi-1, pi=ppi;
+        if((uint)pi==i) break;
+        src=ar+pi*size;
+      }
+      memcpy(dst, item, size);
+    }
+  }
+}
+
+void sarray_permute_buf_(size_t align, size_t size, void *A, size_t n,
+                         buffer *buf)
+{
+  buffer_reserve(buf,align_as_(align,n*sizeof(uint)+size));
+  sarray_permute_(size,A,n, buf->ptr,
+                 (char*)buf->ptr + align_as_(align,n*sizeof(uint)));
+}
diff --git a/src/jl/sarray_sort.h b/src/jl/sarray_sort.h
new file mode 100644
index 0000000..77dc653
--- /dev/null
+++ b/src/jl/sarray_sort.h
@@ -0,0 +1,89 @@
+#ifndef SARRAY_SORT_H
+#define SARRAY_SORT_H
+
+#if !defined(SORT_H)
+#warning "sarray_sort.h" requires "sort.h"
+#endif
+
+/*------------------------------------------------------------------------------
+  
+  Array of Structs Sort
+  
+  buffer *buf;
+  typedef struct { ... } T;
+  T A[n];
+
+  sarray_sort(T,A,n, field_name,is_long, buf)
+    - sort A according to the struct field "field_name",
+      which is a ulong/uint field according as is_long is true/false
+
+  sarray_sort_2(T,A,n, field1,is_long1, field2,is_long2, buf)
+    - sort A by field1 then field2
+
+  sarray_permute(T,A,n, perm, work)
+    - permute A  (in-place)
+      A[0] <- A[perm[0]], etc.
+      work needs to hold sizeof(T) bytes  (i.e., 1 T)
+
+  sarray_permute_buf(T,A,n, buf);
+    - permute A according to the permutation in buf
+      A[0] <- A[perm[0]], etc.
+      where uint *perm = buf->ptr   (see "sort.h")
+
+  ----------------------------------------------------------------------------*/
+
+
+#define sarray_permute_     PREFIXED_NAME(sarray_permute_)
+#define sarray_permute_buf_ PREFIXED_NAME(sarray_permute_buf_)
+
+void sarray_permute_(size_t size, void *A, size_t n, uint *perm, void *work);
+void sarray_permute_buf_(
+  size_t align, size_t size, void *A, size_t n, buffer *buf);
+
+#define sarray_permute(T,A,n, perm, work) \
+  sarray_permute_(sizeof(T),A,n, perm, work)
+#define sarray_permute_buf(T,A,n, buf) \
+  sarray_permute_buf_(ALIGNOF(T),sizeof(T),A,n,buf)
+
+#define sarray_sort_field(T,A,n, field,is_long, buf,keep) do { \
+  if(is_long) \
+    sortp_long(buf,keep, (ulong*)((char*)(A)+offsetof(T,field)),n,sizeof(T)); \
+  else \
+    sortp     (buf,keep, (uint *)((char*)(A)+offsetof(T,field)),n,sizeof(T)); \
+} while (0)
+
+#define sarray_sort(T,A,n, field,is_long, buf) do { \
+  sarray_sort_field(T,A,n, field,is_long, buf,0); \
+  sarray_permute_buf(T,A,n, buf); \
+} while (0)
+
+#define sarray_sort_2(T,A,n, field1,is_long1, field2,is_long2, buf) do { \
+  sarray_sort_field(T,A,n, field2,is_long2, buf,0); \
+  sarray_sort_field(T,A,n, field1,is_long1, buf,1); \
+  sarray_permute_buf(T,A,n, buf); \
+} while (0)
+
+#define sarray_sort_3(T,A,n, field1,is_long1, field2,is_long2, \
+                             field3,is_long3, buf) do { \
+  sarray_sort_field(T,A,n, field3,is_long3, buf,0); \
+  sarray_sort_field(T,A,n, field2,is_long2, buf,1); \
+  sarray_sort_field(T,A,n, field1,is_long1, buf,1); \
+  sarray_permute_buf(T,A,n, buf); \
+} while (0)
+
+#define sarray_sort_4(T,A,n, field1,is_long1, field2,is_long2, \
+                             field3,is_long3, field4,is_long4, buf) do { \
+  sarray_sort_field(T,A,n, field4,is_long4, buf,0); \
+  sarray_sort_field(T,A,n, field3,is_long3, buf,1); \
+  sarray_sort_field(T,A,n, field2,is_long2, buf,1); \
+  sarray_sort_field(T,A,n, field1,is_long1, buf,1); \
+  sarray_permute_buf(T,A,n, buf); \
+} while (0)
+
+static void sarray_perm_invert(
+  uint *const pinv, const uint *const perm, const uint n)
+{
+  uint i; for(i=0;i<n;++i) pinv[perm[i]] = i;
+}
+
+#endif
diff --git a/src/jl/sarray_sort_test.c b/src/jl/sarray_sort_test.c
new file mode 100644
index 0000000..15fa780
--- /dev/null
+++ b/src/jl/sarray_sort_test.c
@@ -0,0 +1,47 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "mem.h"
+#include "sort.h"
+#include "sarray_sort.h"
+
+int main()
+{
+  struct rec { double d; slong l; sint i; float f; };
+  buffer buf = {0,0,0};
+  struct rec rec[500];
+  uint i;
+  
+  for(i=0;i<500;++i) {
+    sint num1 = rand() & 0xff;
+    slong num2 = rand();
+    num2<<=(CHAR_BIT)*sizeof(int)-1;
+    num2|=rand();
+    num2<<=(CHAR_BIT)*sizeof(int)-1;
+    num2|=rand();
+    num2= num2<0?-num2:num2;
+    rec[i].d = num2;
+    rec[i].f = num2;
+    rec[i].l = num2;
+    rec[i].i = num1;
+  }
+  sarray_sort_2(struct rec,rec,500, i,0, l,1, &buf);
+  for(i=0;i<500;++i)
+    printf("%g\t%g\t%ld\t%d\n",
+      rec[i].d,rec[i].f,(long)rec[i].l,(int)rec[i].i);
+
+  printf("\n");
+  sarray_sort(struct rec,rec,500, l,1, &buf);
+  for(i=0;i<500;++i)
+    printf("%g\t%g\t%ld\t%d\n",
+      rec[i].d,rec[i].f,(long)rec[i].l,(int)rec[i].i);
+  buffer_free(&buf);
+  return 0;
+}
+
diff --git a/src/jl/sarray_transfer.c b/src/jl/sarray_transfer.c
new file mode 100644
index 0000000..9eed6ba
--- /dev/null
+++ b/src/jl/sarray_transfer.c
@@ -0,0 +1,197 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "comm.h"
+#include "mem.h"
+#include "crystal.h"
+#include "sort.h"
+
+#define sarray_transfer_many PREFIXED_NAME(sarray_transfer_many)
+#define sarray_transfer_     PREFIXED_NAME(sarray_transfer_    )
+#define sarray_transfer_ext_ PREFIXED_NAME(sarray_transfer_ext_)
+
+static void pack_int(
+  buffer *const data, const unsigned row_size, const uint id,
+  const char *const restrict input, const uint n, const unsigned size,
+  const unsigned p_off, const uint *const restrict perm)
+{
+  const unsigned after = p_off + sizeof(uint), after_len = size-after;
+
+#define GET_P() memcpy(&p,row+p_off,sizeof(uint))
+#define COPY_ROW() memcpy(out,row,p_off), \
+                   memcpy((char*)out + p_off,row+after,after_len)
+
+#define PACK_BODY() do {                                                  \
+  uint dummy, *len_ptr=&dummy;                                            \
+  uint i, p,lp = -(uint)1, len=0;                                         \
+  uint *restrict out = buffer_reserve(data, n*(row_size+3)*sizeof(uint)); \
+  for(i=0;i<n;++i) {                                                      \
+    const char *row = input + size*perm[i];                               \
+    GET_P();                                                              \
+    if(p!=lp) {                                                           \
+      lp = p;                                                             \
+      *len_ptr = len;       /* previous message length */                 \
+      *out++ = p;           /* target */                                  \
+      *out++ = id;          /* source */                                  \
+      len_ptr=out++; len=0; /* length (t.b.d.) */                         \
+    }                                                                     \
+    COPY_ROW();                                                           \
+    out += row_size, len += row_size;                                     \
+  }                                                                       \
+  *len_ptr = len; /* last message length */                               \
+  data->n = out - (uint*)data->ptr;                                       \
+} while(0)
+  PACK_BODY();
+#undef COPY_ROW
+#undef GET_P
+}
+
+static void pack_ext(
+  buffer *const data, const unsigned row_size, const uint id,
+  const char *const restrict input, const uint n, const unsigned size,
+  const uint *const restrict proc, const unsigned proc_stride,
+  const uint *const restrict perm)
+{
+  #define GET_P() p=*(const uint*)((const char*)proc+proc_stride*perm[i])
+  #define COPY_ROW() memcpy(out,row,size)
+  PACK_BODY();
+  #undef PACK_BODY
+  #undef COPY_ROW
+  #undef GET_P
+}
+
+static void pack_more(
+  buffer *const data, const unsigned off, const unsigned row_size,
+  const char *const restrict input, const unsigned size,
+  const uint *restrict perm)
+{
+  uint *restrict buf = data->ptr, *buf_end = buf+data->n;
+  while(buf!=buf_end) {
+    uint *msg_end = buf+3+buf[2]; buf+=3;
+    while(buf!=msg_end)
+      memcpy((char*)buf+off, input+size*(*perm++), size), buf+=row_size;
+  }
+}
+
+static void unpack_more(
+  char *restrict out, const unsigned size,
+  const buffer *const data, const unsigned off, const unsigned row_size)
+{
+  const uint *restrict buf = data->ptr, *buf_end = buf+data->n;
+  while(buf!=buf_end) {
+    const uint *msg_end = buf+3+buf[2]; buf+=3;
+    while(buf!=msg_end)
+      memcpy(out, (char*)buf+off, size), out+=size, buf+=row_size;
+  }
+}
+
+static void unpack_int(
+  char *restrict out, const unsigned size, const unsigned p_off,
+  const buffer *const data, const unsigned row_size, int set_src)
+{
+  const unsigned after = p_off + sizeof(uint), after_len = size-after;
+  const uint *restrict buf = data->ptr, *buf_end = buf+data->n;
+  const unsigned pi = set_src ? 1:0;
+  while(buf!=buf_end) {
+    const uint p=buf[pi], *msg_end = buf+3+buf[2]; buf+=3;
+    while(buf!=msg_end) {
+      memcpy(out,buf,p_off);
+      memcpy(out+p_off,&p,sizeof(uint));
+      memcpy(out+after,(const char *)buf+p_off,after_len);
+      out+=size, buf+=row_size;
+    }
+  }
+}
+
+static uint num_rows(const buffer *const data, const unsigned row_size)
+{
+  const uint *buf = data->ptr, *buf_end = buf + data->n;
+  uint n=0;
+  while(buf!=buf_end) { uint len=buf[2]; n+=len, buf+=len+3; }
+  return n/row_size;
+}
+
+static uint cap_rows(buffer *const data, const unsigned row_size,const uint max)
+{
+  uint *buf = data->ptr, *buf_end = buf + data->n;
+  const uint maxn = max*row_size;
+  uint n=0;
+  while(buf!=buf_end) {
+    uint len=buf[2]; n+=len;
+    if(n<maxn) buf+=len+3;
+    else {
+      buf[2]-=(maxn-n); data->n = (buf-(uint*)data->ptr)+3+buf[2];
+      buf+=len+3;
+      while(buf!=buf_end) { uint len=buf[2]; n+=len, buf+=len+3; }
+      break;
+    }
+  }
+  return n/row_size;
+}
+
+/* An must be >= 1 */
+uint sarray_transfer_many(
+  struct array *const *const A, const unsigned *const size, const unsigned An,
+  const int fixed, const int ext, const int set_src, const unsigned p_off,
+  const uint *const restrict proc, const unsigned proc_stride,
+  struct crystal *const cr)
+{
+  uint n, *perm;
+  unsigned i,row_size,off,off1;
+
+  off1 = size[0];
+  if(!ext) off1 -= sizeof(uint);
+  row_size=off1; for(i=1;i<An;++i) row_size += size[i];
+  row_size = (row_size+sizeof(uint)-1)/sizeof(uint);
+  
+  perm = sortp(&cr->work,0, proc,A[0]->n,proc_stride);
+
+  if(!ext) pack_int(&cr->data, row_size, cr->comm.id, A[0]->ptr,A[0]->n,size[0],
+                    p_off, perm);
+  else     pack_ext(&cr->data, row_size, cr->comm.id, A[0]->ptr,A[0]->n,size[0],
+                    proc,proc_stride, perm);
+  for(off=off1,i=1;i<An;++i) if(size[i])
+    pack_more(&cr->data,off,row_size, A[i]->ptr,size[i], perm),off+=size[i];
+    
+  crystal_router(cr);
+  
+  if(!fixed) {
+    n = num_rows(&cr->data,row_size);
+    for(i=0;i<An;++i)
+      array_reserve_(A[i],n,size[i],__FILE__,__LINE__), A[i]->n=n;
+  } else {
+    uint max=A[0]->max, an;
+    for(i=1;i<An;++i) if(A[i]->max<max) max=A[i]->max;
+    n = cap_rows(&cr->data,row_size, max);
+    an = n>max?max:n;
+    for(i=0;i<An;++i) A[i]->n=an;
+  }
+  
+  if(!ext) unpack_int (A[0]->ptr,size[0],p_off, &cr->data,  row_size, set_src);
+  else     unpack_more(A[0]->ptr,size[0],       &cr->data,0,row_size);
+  for(off=off1,i=1;i<An;++i) if(size[i])
+    unpack_more(A[i]->ptr,size[i], &cr->data,off,row_size),off+=size[i];
+    
+  return n;
+}
+  
+
+void sarray_transfer_(struct array *const A, const unsigned size,
+                      const unsigned p_off, const int set_src,
+                      struct crystal *const cr)
+{
+  sarray_transfer_many(&A,&size,1, 0,0,set_src,p_off,
+                       (uint*)((char*)A->ptr+p_off),size, cr);
+}
+
+void sarray_transfer_ext_(struct array *const A, const unsigned size,
+                          const uint *const proc, const unsigned proc_stride,
+                          struct crystal *const cr)
+{
+  sarray_transfer_many(&A,&size,1, 0,1,0,0, proc,proc_stride, cr);
+}
+
diff --git a/src/jl/sarray_transfer.h b/src/jl/sarray_transfer.h
new file mode 100644
index 0000000..c195e21
--- /dev/null
+++ b/src/jl/sarray_transfer.h
@@ -0,0 +1,95 @@
+#ifndef SARRAY_TRANSFER_H
+#define SARRAY_TRANSFER_H
+
+#if !defined(CRYSTAL_H)
+#warning "sarray_transfer.h" requires "crystal.h"
+#endif
+
+/*
+  High-level interface for the crystal router.
+  Given an array of structs, transfers each to the process indicated
+  by a field of the struct, which gets set to the source process on output.
+  
+  For the dynamic "array" type, see "mem.h".
+  
+  Requires a "crystal router" object:
+  
+    struct comm c;
+    struct crystal cr;
+    
+    comm_init(&c, MPI_COMM_WORLD);
+    crystal_init(&cr, &c);
+    
+  Example sarray_transfer usage:
+  
+    struct T { ...; uint proc; ...; };
+    struct array A = null_array;
+    struct T *p, *e;
+    
+    // resize A to 100 struct T's, fill up with data
+    p = array_reserve(struct T, &A, 100), A.n=100;
+    for(e=p+A.n;p!=e;++p) {
+      ...
+      p->proc = ...;
+      ...
+    }
+    
+    // array A represents the array
+    //   struct T ar[A.n]    where &ar[0] == A.ptr
+    // transfer ar[i] to processor ar[i].proc  for each i=0,...,A.n-1:
+    
+    sarray_transfer(struct T, A, proc,set_src, &cr);
+    
+    // now array A represents a different array with a different size
+    //   struct T ar[A.n]    where &ar[0] == A.ptr
+    // the ordering is arbitrary
+    // if set_src != 0, ar[i].proc is set to the proc where ar[i] came from
+    // otherwise ar[i].proc is unchanged (and == this proc id)
+    
+    // note: two calls of
+    sarray_transfer(struct T, A, proc,1, &cr);
+    // in a row should return A to its original state, up to ordering
+ 
+  Cleanup:
+    array_free(&A);
+    crystal_free(&cr);
+    comm_free(&c);
+
+  Example sarray_transfer_ext usage:
+  
+    struct T { ... };
+    struct array A;
+    uint proc[A.n];
+    
+    // array A represents the array
+    //   struct T ar[A.n]    where &ar[0] == A.ptr
+    // transfer ar[i] to processor proc[i]  for each i=0,...,A.n-1:
+    sarray_transfer_ext(struct T, &A, proc, &cr);
+    
+    // no information is available now on where each struct came from
+
+*/
+
+#define sarray_transfer_many PREFIXED_NAME(sarray_transfer_many)
+#define sarray_transfer_     PREFIXED_NAME(sarray_transfer_    )
+#define sarray_transfer_ext_ PREFIXED_NAME(sarray_transfer_ext_)
+
+uint sarray_transfer_many(
+  struct array *const *const A, const unsigned *const size, const unsigned An,
+  const int fixed, const int ext, const int set_src, const unsigned p_off,
+  const uint *const restrict proc, const unsigned proc_stride,
+  struct crystal *const cr);
+void sarray_transfer_(struct array *const A, const unsigned size,
+                      const unsigned p_off, const int set_src,
+                      struct crystal *const cr);
+void sarray_transfer_ext_(struct array *const A, const unsigned size,
+                          const uint *const proc, const unsigned proc_stride,
+                          struct crystal *const cr);
+
+#define sarray_transfer(T,A,proc_field,set_src,cr) \
+  sarray_transfer_(A,sizeof(T),offsetof(T,proc_field),set_src,cr)
+
+#define sarray_transfer_ext(T,A,proc,proc_stride,cr) \
+  sarray_transfer_ext_(A,sizeof(T),proc,proc_stride,cr)
+
+#endif
diff --git a/src/jl/sarray_transfer_test.c b/src/jl/sarray_transfer_test.c
new file mode 100644
index 0000000..aaf3b7f
--- /dev/null
+++ b/src/jl/sarray_transfer_test.c
@@ -0,0 +1,93 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "comm.h"
+#include "mem.h"
+#include "sort.h"
+#include "sarray_sort.h"
+#include "crystal.h"
+#include "sarray_transfer.h"
+
+typedef struct {
+  double d;
+  ulong l,l2;
+  uint i;
+  uint p;
+} r_work;
+
+int main(int narg, char *arg[])
+{
+  comm_ext world; int np;
+  struct comm comm;
+  struct crystal crystal;
+  struct array A, A0=null_array; r_work *row, *row_0;
+  uint i;
+#ifdef MPI
+  MPI_Init(&narg,&arg);
+  world = MPI_COMM_WORLD;
+  MPI_Comm_size(world,&np);
+#else
+  world=0, np=1;
+#endif
+
+  comm_init(&comm,world);
+  crystal_init(&crystal,&comm);
+
+  array_init(r_work,&A,np*3), A.n=np*3, row=A.ptr;
+  for(i=0;i<A.n;++i) {
+    row[i].i = rand();
+    row[i].l = row[i].l2 = rand();
+    row[i].p = rand()%np;
+    row[i].d = rand()/(double)rand();
+  }
+  
+  sarray_sort_3(r_work,row,A.n, i,0, l,1, p,0, &crystal.data);
+  
+  for(i=0;i<A.n;++i)
+    printf("%02d send -> %02d: %08x %08x %d %g\n",
+      (int)comm.id,(int)row[i].p,(int)row[i].i,
+      (int)row[i].l,(int)row[i].p,row[i].d);
+  
+  array_cat(r_work,&A0, row,A.n);
+  
+  sarray_transfer(r_work,&A, p,1, &crystal);
+
+  row=A.ptr;
+  for(i=0;i<A.n;++i)
+    printf("%02d recv <- %02d: %08x %08x %d %g\n",
+      (int)comm.id,(int)row[i].p,(int)row[i].i,
+      (int)row[i].l,(int)row[i].p,row[i].d);
+
+  sarray_transfer(r_work,&A, p,1, &crystal);
+  sarray_sort_3(r_work,row,A.n, i,0, l,1, p,0, &crystal.data);
+  if(A.n!=A0.n)
+    fail(1,__FILE__,__LINE__,"final array has different length than original");
+  row=A.ptr, row_0=A0.ptr;
+  for(i=0;i<A.n;++i)
+    if(   row[i].d != row_0[i].d
+       || row[i].l != row_0[i].l
+       || row[i].l2!= row_0[i].l2
+       || row[i].i != row_0[i].i
+       || row[i].p != row_0[i].p)
+      fail(1,__FILE__,__LINE__,"final array differs from original");
+      
+  array_free(&A0);
+  array_free(&A);
+  crystal_free(&crystal);
+
+  fflush(stdout); comm_barrier(&comm);
+  if(comm.id==0) printf("tests passed\n"), fflush(stdout);
+  
+  comm_free(&comm);
+  
+#ifdef MPI
+  MPI_Finalize();
+#endif
+
+  return 0;
+}
diff --git a/src/jl/sort.c b/src/jl/sort.c
new file mode 100644
index 0000000..5b25f42
--- /dev/null
+++ b/src/jl/sort.c
@@ -0,0 +1,31 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "mem.h"
+
+#define T unsigned int
+#define SORT_SUFFIX _ui
+#include "sort_imp.h"
+#undef SORT_SUFFIX
+#undef T
+
+#if defined(USE_LONG) || defined(GLOBAL_LONG)
+#  define T unsigned long
+#  define SORT_SUFFIX _ul
+#  include "sort_imp.h"
+#  undef SORT_SUFFIX
+#  undef T
+#endif
+
+#if defined(USE_LONG_LONG) || defined(GLOBAL_LONG_LONG)
+#  define T unsigned long long
+#  define SORT_SUFFIX _ull
+#  include "sort_imp.h"
+#  undef SORT_SUFFIX
+#  undef T
+#endif
diff --git a/src/jl/sort.h b/src/jl/sort.h
new file mode 100644
index 0000000..eaeeb95
--- /dev/null
+++ b/src/jl/sort.h
@@ -0,0 +1,76 @@
+#ifndef SORT_H
+#define SORT_H
+
+#if !defined(TYPES_H) || !defined(MEM_H)
+#warning "sort.h" requires "types.h" and "mem.h"
+/* types.h defines uint, ulong
+   mem.h   defines buffer */
+#endif
+
+/*------------------------------------------------------------------------------
+  
+  Sort
+  
+  O(n) stable sort with good performance for all n
+
+  sortv     (uint  *out,  const uint  *A, uint n, uint stride,  buffer *buf)
+  sortv_long(ulong *out,  const ulong *A, uint n, uint stride,  buffer *buf)
+
+  sortp     (buffer *buf, int perm_start,  const uint  *A, uint n, uint stride)
+  sortp_long(buffer *buf, int perm_start,  const ulong *A, uint n, uint stride)
+
+  A, n, stride : specifices the input (stride is in bytes!)
+  out : the sorted values on output
+
+  For the value sort, (sortv*)
+    A and out may alias (A == out) exactly when stride == sizeof(T)
+
+  For the permutation sort, (sortp*)
+    The permutation can be both input (when start_perm!=0) and output,
+    following the convention that it is always at the start of the buffer buf:
+      uint *perm = buf->ptr;
+    The permutation denotes the ordering
+      A[perm[0]], A[perm[1]], ..., A[perm[n-1]]
+    (assuming stride == sizeof(uint) or sizeof(ulong) as appropriate)
+    and is re-arranged stably to give a sorted ordering.
+    Specifying start_perm==0 is equivalent to specifying
+      perm[i] = i,   i=0,...,n-1
+    for an initial permutation (but may be faster).
+    The buffer will be expanded as necessary to accomodate the permutation
+    and the required scratch space.
+  
+  Most code calls these routines indirectly via the higher-level routine
+    sarray_sort for sorting arrays of structs (see "sarray_sort.h").
+
+  ----------------------------------------------------------------------------*/
+
+#define sortv_ui  PREFIXED_NAME(sortv_ui)
+#define sortv_ul  PREFIXED_NAME(sortv_ul)
+#define sortv_ull PREFIXED_NAME(sortv_ull)
+#define sortp_ui  PREFIXED_NAME(sortp_ui)
+#define sortp_ul  PREFIXED_NAME(sortp_ul)
+#define sortp_ull PREFIXED_NAME(sortp_ull)
+
+#define sortv TYPE_LOCAL(sortv_ui,sortv_ul,sortv_ull)
+#define sortp TYPE_LOCAL(sortp_ui,sortp_ul,sortp_ull)
+#define sortv_long TYPE_GLOBAL(sortv_ui,sortv_ul,sortv_ull)
+#define sortp_long TYPE_GLOBAL(sortp_ui,sortp_ul,sortp_ull)
+
+void sortv_ui(unsigned *out, const unsigned *A, uint n, unsigned stride,
+              buffer *restrict buf);
+void sortv_ul(unsigned long *out,
+              const unsigned long *A, uint n, unsigned stride,
+              buffer *restrict buf);
+uint *sortp_ui(buffer *restrict buf, int start_perm,
+               const unsigned *restrict A, uint n, unsigned stride);
+uint *sortp_ul(buffer *restrict buf, int start_perm,
+               const unsigned long *restrict A, uint n, unsigned stride);
+#if defined(USE_LONG_LONG) || defined(GLOBAL_LONG_LONG)
+void sortv_ull(unsigned long long *out,
+               const unsigned long long *A, uint n, unsigned stride,
+               buffer *restrict buf);
+uint *sortp_ull(buffer *restrict buf, int start_perm,
+                const unsigned long long *restrict A, uint n, unsigned stride);
+#endif
+
+#endif
diff --git a/src/jl/sort_imp.h b/src/jl/sort_imp.h
new file mode 100644
index 0000000..08b05d1
--- /dev/null
+++ b/src/jl/sort_imp.h
@@ -0,0 +1,543 @@
+#if !defined(T) || !defined(SORT_SUFFIX)
+#error sort_imp.h not meant to be compiled by itself
+#endif
+
+#define sort_data       TOKEN_PASTE(sort_data      ,SORT_SUFFIX)
+#define radix_count     TOKEN_PASTE(radix_count    ,SORT_SUFFIX)
+#define radix_offsets   TOKEN_PASTE(radix_offsets  ,SORT_SUFFIX)
+#define radix_zeros     TOKEN_PASTE(radix_zeros    ,SORT_SUFFIX)
+#define radix_passv     TOKEN_PASTE(radix_passv    ,SORT_SUFFIX)
+#define radix_sortv     TOKEN_PASTE(radix_sortv    ,SORT_SUFFIX)
+#define radix_passp0_b  TOKEN_PASTE(radix_passp0_b ,SORT_SUFFIX)
+#define radix_passp_b   TOKEN_PASTE(radix_passp_b  ,SORT_SUFFIX)
+#define radix_passp_m   TOKEN_PASTE(radix_passp_m  ,SORT_SUFFIX)
+#define radix_passp_e   TOKEN_PASTE(radix_passp_e  ,SORT_SUFFIX)
+#define radix_passp0_be TOKEN_PASTE(radix_passp0_be,SORT_SUFFIX)
+#define radix_passp_be  TOKEN_PASTE(radix_passp_be, SORT_SUFFIX)
+#define radix_sortp     TOKEN_PASTE(radix_sortp    ,SORT_SUFFIX)
+#define merge_sortv     TOKEN_PASTE(merge_sortv    ,SORT_SUFFIX)
+#define merge_copy_perm TOKEN_PASTE(merge_copy_perm,SORT_SUFFIX)
+#define merge_sortp0    TOKEN_PASTE(merge_sortp0   ,SORT_SUFFIX)
+#define merge_sortp     TOKEN_PASTE(merge_sortp    ,SORT_SUFFIX)
+#define heap_sortv      TOKEN_PASTE(heap_sortv     ,SORT_SUFFIX)
+
+#define sortv PREFIXED_NAME(TOKEN_PASTE(sortv,SORT_SUFFIX))
+#define sortp PREFIXED_NAME(TOKEN_PASTE(sortp,SORT_SUFFIX))
+
+typedef struct { T v; uint i; } sort_data;
+
+#define INC_PTR(A,stride) ((A)=(T*)((char*)(A)+(stride)))
+#define INDEX_PTR(A,stride,i) (*(T*)((char*)(A)+(i)*(stride)))
+
+/*------------------------------------------------------------------------------
+  
+  Radix Sort
+  
+  stable; O(n+k) time and extra storage
+    where k = (digits in an int) * 2^(bits per digit)
+    (e.g. k = 4 * 256 = 1024 for 32-bit ints with 8-bit digits)
+
+  brief description:
+    input sorted stably on each digit, starting with the least significant
+    counting sort is used for each digit:
+      a pass through the input counts the occurences of each digit value
+      on a second pass, each input has a known destination
+  
+  tricks:
+    all counting passes are combined into one
+    the counting pass also computes the inclusive bit-wise or of all inputs,
+      which is used to skip digit positions for which all inputs have zeros
+
+  ----------------------------------------------------------------------------*/
+
+#define STATIC_DIGIT_BUCKETS 1
+
+#define DIGIT_BITS   8
+#define DIGIT_VALUES (1<<DIGIT_BITS)
+#define DIGIT_MASK   ((T)(DIGIT_VALUES-1))
+#define CEILDIV(a,b) (((a)+(b)-1)/(b))
+#define DIGITS       CEILDIV(CHAR_BIT*sizeof(T),DIGIT_BITS)
+#define VALUE_BITS   (DIGIT_BITS*DIGITS)
+#define COUNT_SIZE   (DIGITS*DIGIT_VALUES)
+
+/* used to unroll a tiny loop: */
+#define COUNT_DIGIT_01(n,i) \
+    if(n>i) count[i][val&DIGIT_MASK]++, val>>=DIGIT_BITS
+#define COUNT_DIGIT_02(n,i) COUNT_DIGIT_01(n,i); COUNT_DIGIT_01(n,i+ 1)
+#define COUNT_DIGIT_04(n,i) COUNT_DIGIT_02(n,i); COUNT_DIGIT_02(n,i+ 2)
+#define COUNT_DIGIT_08(n,i) COUNT_DIGIT_04(n,i); COUNT_DIGIT_04(n,i+ 4)
+#define COUNT_DIGIT_16(n,i) COUNT_DIGIT_08(n,i); COUNT_DIGIT_08(n,i+ 8)
+#define COUNT_DIGIT_32(n,i) COUNT_DIGIT_16(n,i); COUNT_DIGIT_16(n,i+16)
+#define COUNT_DIGIT_64(n,i) COUNT_DIGIT_32(n,i); COUNT_DIGIT_32(n,i+32)
+
+static T radix_count(
+  uint (*restrict count)[DIGIT_VALUES],
+  const T *restrict A, const T *const end, const unsigned stride)
+{
+  T bitorkey = 0;
+  memset(count,0,COUNT_SIZE*sizeof(uint));
+  do {
+    T val=*A;
+    bitorkey|=val;
+    COUNT_DIGIT_64(DIGITS,0);
+    /* above macro expands to:
+    if(DIGITS> 0) count[ 0][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
+    if(DIGITS> 1) count[ 1][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
+      ...
+    if(DIGITS>63) count[63][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
+    */
+  } while(INC_PTR(A,stride),A!=end);
+  return bitorkey;
+}
+
+#undef COUNT_DIGIT_01
+#undef COUNT_DIGIT_02
+#undef COUNT_DIGIT_04
+#undef COUNT_DIGIT_08
+#undef COUNT_DIGIT_16
+#undef COUNT_DIGIT_32
+#undef COUNT_DIGIT_64
+
+static void radix_offsets(uint *restrict c)
+{
+  uint *const ce = c+DIGIT_VALUES;
+  uint sum = 0;
+  do {
+    const uint c0=c[0], c1=c[1], c2=c[2], c3=c[3]; 
+    const uint o1=sum+c0, o2=o1+c1, o3=o2+c2;
+    c[0]=sum, c[1]=o1, c[2]=o2, c[3]=o3;
+    sum = o3+c3;
+    c+=4;
+  } while(c!=ce);
+}
+
+static unsigned radix_zeros(
+  T bitorkey, uint (*restrict count)[DIGIT_VALUES],
+  unsigned *restrict shift, uint **restrict offsets)
+{
+  unsigned digits=0, sh=0; uint *c = &count[0][0];
+  do {
+    if(bitorkey&DIGIT_MASK) *shift++ = sh, *offsets++ = c, ++digits,
+                            radix_offsets(c);
+  } while(bitorkey>>=DIGIT_BITS,sh+=DIGIT_BITS,c+=DIGIT_VALUES,sh!=VALUE_BITS);
+  return digits;
+}
+
+static void radix_passv(
+  const T *restrict A, const T *const end, const unsigned stride,
+  const unsigned sh, uint *const restrict off, T *const restrict out)
+{
+  do out[off[(*A>>sh)&DIGIT_MASK]++] = *A; while(INC_PTR(A,stride),A!=end);
+}
+
+static void radix_sortv(
+  T *out, const T *A, const uint n, const unsigned stride,
+  T *work, uint (*restrict count)[DIGIT_VALUES])
+{
+  const T *const end = &INDEX_PTR(A,stride,n);
+  T bitorkey = radix_count(count, A,end,stride);
+  unsigned shift[DIGITS]; uint *offsets[DIGITS];
+  const unsigned digits = radix_zeros(bitorkey,count,shift,offsets);
+  if(digits==0) {
+    memset(out,0,n*sizeof(T));
+  } else {
+    T *src, *dst; unsigned d;
+    if(out==A || (digits&1)==0) dst=out,src=work;
+                           else src=out,dst=work;
+    radix_passv(A,end,stride,shift[0],offsets[0],src);
+    for(d=1;d!=digits;++d) {
+      T *t;
+      radix_passv(src,src+n,sizeof(T),shift[d],offsets[d],dst);
+      t=src,src=dst,dst=t;
+    }
+    if(src!=out) memcpy(out,src,n*sizeof(T));
+  }
+}
+
+static void radix_passp0_b(
+  const T *restrict A, const uint n, const unsigned stride,
+  const unsigned sh, uint *const restrict off,
+  sort_data *const restrict out)
+{
+  uint i=0;
+  do {
+    T v = *A;
+    sort_data *d = &out[off[(v>>sh)&DIGIT_MASK]++];
+    d->v=v, d->i=i++;
+  } while(INC_PTR(A,stride),i!=n);
+}
+
+static void radix_passp_b(
+  const uint *restrict p,
+  const T *const restrict A, const uint n, const unsigned stride,
+  const unsigned sh, uint *const restrict off,
+  sort_data *const out)
+{
+  const uint *const pe = p+n;
+  do {
+    uint j = *p++;
+    T v = INDEX_PTR(A,stride,j);
+    sort_data *d = &out[off[(v>>sh)&DIGIT_MASK]++];
+    d->v=v, d->i=j;
+  } while(p!=pe);
+}
+
+static void radix_passp_m(
+  const sort_data *restrict src, const sort_data *const end,
+  const unsigned sh, uint *const restrict off,
+  sort_data *const restrict out)
+{
+  do {
+    sort_data *d = &out[off[(src->v>>sh)&DIGIT_MASK]++];
+    d->v=src->v,d->i=src->i;
+  } while(++src!=end);
+}
+
+static void radix_passp_e(
+  const sort_data *restrict src, const sort_data *const end,
+  const unsigned sh, uint *const restrict off,
+  uint *const restrict out)
+{
+  do out[off[(src->v>>sh)&DIGIT_MASK]++]=src->i; while(++src!=end);
+}
+
+static void radix_passp0_be(
+  uint *const restrict out,
+  const T *restrict A, const uint n, const unsigned stride,
+  const unsigned sh, uint *const restrict off)
+{
+  uint i=0;
+  do out[off[(*A>>sh)&DIGIT_MASK]++]=i++; while(INC_PTR(A,stride),i!=n);
+}
+
+static void radix_passp_be(
+  uint *restrict p,
+  const T *restrict A, const uint n, const unsigned stride,
+  const unsigned sh, uint *const restrict off,
+  sort_data *restrict work)
+{
+  uint *q = p, *const qe = p+n;
+  uint *w = &work[0].i;
+  do {
+    uint j = *q++;
+    T v = INDEX_PTR(A,stride,j);
+    w[off[(v>>sh)&DIGIT_MASK]++]=j;
+  } while(q!=qe);
+  memcpy(p,w,n*sizeof(uint));
+}
+
+static void radix_sortp(
+  uint *restrict idx, uint perm_start,
+  const T *restrict A, const uint n, const unsigned stride,
+  sort_data *restrict work,
+  uint (*restrict count)[DIGIT_VALUES])
+{
+  T bitorkey = radix_count(count, A,&INDEX_PTR(A,stride,n),stride);
+  unsigned shift[DIGITS]; uint *offsets[DIGITS];
+  unsigned digits = radix_zeros(bitorkey,count,shift,offsets);
+  if(digits==0) {
+    if(!perm_start) { uint i=0; do *idx++=i++; while(i!=n); }
+  } else if(digits==1) {
+    if(perm_start) radix_passp_be (idx,A,n,stride,shift[0],offsets[0],work);
+              else radix_passp0_be(idx,A,n,stride,shift[0],offsets[0]);
+  } else {
+    sort_data *src, *dst; unsigned d;
+    if((digits&1)==0) dst=work,src=dst+n;
+                 else src=work,dst=src+n;
+    if(perm_start) radix_passp_b (idx,A,n,stride,shift[0],offsets[0],src);
+              else radix_passp0_b(    A,n,stride,shift[0],offsets[0],src);
+    for(d=1;d!=digits-1;++d) {
+      sort_data *t;
+      radix_passp_m(src,src+n,shift[d],offsets[d],dst);
+      t=src,src=dst,dst=t;
+    }
+    radix_passp_e(src,src+n,shift[d],offsets[d],idx);
+  }
+}
+
+/*------------------------------------------------------------------------------
+  
+  Merge Sort
+  
+  stable; O(n log n) time
+
+  ----------------------------------------------------------------------------*/
+
+#define MERGE_2(p,v)                           \
+  if(VAL(v[1])<VAL(v[0])) p[0]=v[1],p[1]=v[0]; \
+                     else p[0]=v[0],p[1]=v[1]
+#define MERGE_3(p,v) do                                              \
+  if(VAL(v[1])<VAL(v[0])) {                                          \
+    if(VAL(v[2])<VAL(v[1]))        p[0]=v[2],p[1]=v[1],p[2]=v[0];    \
+    else { if(VAL(v[2])<VAL(v[0])) p[0]=v[1],p[1]=v[2],p[2]=v[0];    \
+                              else p[0]=v[1],p[1]=v[0],p[2]=v[2]; }  \
+  } else {                                                           \
+     if(VAL(v[2])<VAL(v[0]))        p[0]=v[2],p[1]=v[0],p[2]=v[1];   \
+     else { if(VAL(v[2])<VAL(v[1])) p[0]=v[0],p[1]=v[2],p[2]=v[1];   \
+                               else p[0]=v[0],p[1]=v[1],p[2]=v[2]; } \
+  } while(0)
+#define MERGE_SORT() \
+  do {                                                                 \
+    uint i=0, n=An, base=-n, odd=0, c=0, b=1;                          \
+    for(;;) {                                                          \
+      DATA *restrict p;                                                \
+      if((c&1)==0) {                                                   \
+        base+=n, n+=(odd&1), c|=1, b^=1;                               \
+        while(n>3) odd<<=1,odd|=(n&1),n>>=1,c<<=1,b^=1;                \
+      } else                                                           \
+        base-=n-(odd&1),n<<=1,n-=(odd&1),odd>>=1,c>>=1;                \
+      if(c==0) break;                                                  \
+      p = buf[b]+base;                                                 \
+      if(n==2) {                                                       \
+        DATA v[2]; SETVAL(v[0],i), SETVAL(v[1],i+1);                   \
+        MERGE_2(p,v);                                                  \
+        i+=2;                                                          \
+      } else if(n==3) {                                                \
+        DATA v[3]; SETVAL(v[0],i), SETVAL(v[1],i+1), SETVAL(v[2],i+2); \
+        MERGE_3(p,v);                                                  \
+        i+=3;                                                          \
+      } else {                                                         \
+        const uint na = n>>1, nb = (n+1)>>1;                           \
+        const DATA *restrict ap = buf[b^1]+base, *const ae = ap+na;    \
+        DATA *restrict bp = p+na, *const be = bp+nb;                   \
+        for(;;) {                                                      \
+          if(VAL((*bp))<VAL((*ap))) {                                  \
+            *p++=*bp++;                                                \
+            if(bp!=be) continue;                                       \
+            do *p++=*ap++; while(ap!=ae);                              \
+            break;                                                     \
+          } else {                                                     \
+            *p++=*ap++;                                                \
+            if(ap==ae) break;                                          \
+          }                                                            \
+        }                                                              \
+      }                                                                \
+    }                                                                  \
+  } while(0)
+
+static void merge_sortv(
+  T *restrict out,
+  const T *restrict A, const uint An, const unsigned stride,
+  T *restrict work)
+{
+  T *buf[2]; buf[0]=out, buf[1]=work;
+#define DATA T
+#define VAL(x) x
+#define SETVAL(x,ai) x=*A,INC_PTR(A,stride)
+  MERGE_SORT();
+#undef SETVAL
+#undef VAL
+#undef DATA
+}
+
+static void merge_copy_perm(
+  uint *restrict idx, const sort_data *restrict p, uint n)
+{
+  /*const sort_data *pe = p+n;
+  do *idx++ = (p++)->i; while(p!=pe);*/
+  uint n_by_8 = (n+7)/8;
+  switch(n%8) {
+    case 0: do { *idx++ = (p++)->i;
+    case 7:      *idx++ = (p++)->i;
+    case 6:      *idx++ = (p++)->i;
+    case 5:      *idx++ = (p++)->i;
+    case 4:      *idx++ = (p++)->i;
+    case 3:      *idx++ = (p++)->i;
+    case 2:      *idx++ = (p++)->i;
+    case 1:      *idx++ = (p++)->i;
+    } while (--n_by_8 > 0);
+  }
+}
+
+static void merge_sortp0(
+  uint *restrict idx,
+  const T *restrict A, const uint An, const unsigned stride,
+  sort_data *restrict work)
+{
+  sort_data *buf[2]; buf[0]=work+An,buf[1]=work;
+#define DATA sort_data
+#define VAL(x) x.v
+#define SETVAL(x,ai) x.v=*A,INC_PTR(A,stride),x.i=ai
+  MERGE_SORT();
+#undef SETVAL
+#undef VAL
+#undef DATA
+  merge_copy_perm(idx,buf[0],An);
+}
+
+static void merge_sortp(
+  uint *restrict idx,
+  const T *const restrict A, const uint An, const unsigned stride,
+  sort_data *restrict work)
+{
+  sort_data *buf[2]; buf[0]=work+An,buf[1]=work;
+#define DATA sort_data
+#define VAL(x) x.v
+#define SETVAL(x,ai) x.i=idx[ai],x.v=INDEX_PTR(A,stride,x.i)
+  MERGE_SORT();
+#undef SETVAL
+#undef VAL
+#undef DATA
+  merge_copy_perm(idx,buf[0],An);
+}
+
+#undef MERGE_SORT
+#undef MERGE_3
+#undef MERGE_2
+
+/*------------------------------------------------------------------------------
+  
+  Heap Sort
+  
+  in-place, stability unobservable; O(n log n) time
+
+  ----------------------------------------------------------------------------*/
+static void heap_sortv(T *const restrict A, unsigned n)
+{
+  unsigned i;
+  /* build heap */
+  for(i=1;i<n;++i) {
+    T item = A[i];
+    unsigned h=i, p = (h-1)>>1;
+    if(A[p] >= item) continue;
+    do A[h]=A[p], h=p, p=(p-1)>>1; while(h && A[p] < item);
+    A[h] = item;
+  }
+  /* extract */
+  for(i=n-1;i;--i) {
+    T item = A[i];
+    unsigned h = 0;
+    A[i] = A[0];
+    for(;;) {
+      unsigned ch = 1+(h<<1), r = ch+1;
+      if(r<i && A[ch] < A[r]) ch=r;
+      if(ch>=i || item >= A[ch]) break;
+      A[h]=A[ch], h=ch;
+    }
+    A[h] = item;
+  }
+}
+
+
+/*------------------------------------------------------------------------------
+  
+  Hybrid Stable Sort
+  
+  low-overhead merge sort when n is small,
+  otherwise asymptotically superior radix sort
+
+  result = O(n) sort with good performance for all n
+  
+  A, n, stride : specifices the input, stride in bytes
+  out : the sorted values on output
+
+  For the value sort,
+    A and out may alias (A == out) exactly when stride == sizeof(T),
+      in which case heap sort is used for small sizes
+
+  For the permutation sort,
+    the permutation can be both input (when start_perm!=0) and output,
+    following the convention that it is always at the start of the buffer buf;
+    the buffer will be expanded as necessary to accomodate the permutation
+    and the required scratch space
+
+  ----------------------------------------------------------------------------*/
+
+void sortv(T *out, const T *A, uint n, unsigned stride, buffer *restrict buf)
+{
+  if(n<DIGIT_VALUES) {
+    if(n<2) {
+      if(n==0) return;
+      *out = *A;
+    } else {
+      if(out==A) {
+        if(stride!=sizeof(T))
+          fail(1,__FILE__,__LINE__,"in-place sort with non-unit stride");
+        heap_sortv(out,n);
+      } else {
+        buffer_reserve(buf,n*sizeof(T));
+        merge_sortv(out, A,n,stride, (T*)buf->ptr);
+      }
+    }
+  } else if(STATIC_DIGIT_BUCKETS) {
+    static uint count[DIGITS][DIGIT_VALUES];
+    buffer_reserve(buf,n*sizeof(T));
+    radix_sortv(out, A,n,stride, (T*)buf->ptr,count);
+  } else {
+    T *restrict work;
+    uint (*restrict count)[DIGIT_VALUES];
+    const size_t count_off=align_as(uint,n*sizeof(T));
+    buffer_reserve(buf,count_off+sizeof(uint[DIGITS][DIGIT_VALUES]));
+    work = buf->ptr;
+    count = (uint(*)[DIGIT_VALUES])((char*)buf->ptr+count_off);
+    radix_sortv(out, A,n,stride, work,count);
+  }
+}
+
+uint *sortp(buffer *restrict buf, int start_perm,
+            const T *restrict A, uint n, unsigned stride)
+{
+  uint *restrict perm;
+  sort_data *restrict work;
+  size_t work_off=align_as(sort_data,n*sizeof(uint));
+  if(n<DIGIT_VALUES) {
+    buffer_reserve(buf,work_off+2*n*sizeof(sort_data));
+    perm = buf->ptr;
+    work = (sort_data*)((char*)buf->ptr+work_off);
+    if(n<2) {
+      if(n==1) *perm=0;
+    } else {
+      if(start_perm) merge_sortp (perm, A,n,stride, work);
+      else           merge_sortp0(perm, A,n,stride, work);
+    }
+  } else if(STATIC_DIGIT_BUCKETS){
+    static uint count[DIGITS][DIGIT_VALUES];
+    buffer_reserve(buf,work_off+2*n*sizeof(sort_data));
+    perm = buf->ptr;
+    work = (sort_data*)((char*)buf->ptr+work_off);
+    radix_sortp(perm,start_perm, A,n,stride, work,count);
+  } else {
+    uint (*restrict count)[DIGIT_VALUES];
+    const size_t count_off=align_as(uint,work_off+2*n*sizeof(sort_data));
+    buffer_reserve(buf,count_off+sizeof(uint[DIGITS][DIGIT_VALUES]));
+    perm = buf->ptr;
+    work = (sort_data*)((char*)buf->ptr+work_off);
+    count = (uint(*)[DIGIT_VALUES])((char*)buf->ptr+count_off);
+    radix_sortp(perm,start_perm, A,n,stride, work,count);
+  }  
+  return perm;
+}
+
+#undef STATIC_DIGIT_BUCKETS
+
+#undef DIGIT_BITS
+#undef DIGIT_VALUES
+#undef DIGIT_MASK
+#undef CEILDIV
+#undef DIGITS
+#undef VALUE_BITS
+#undef COUNT_SIZE
+
+#undef INDEX_PTR
+#undef INC_PTR
+
+#undef sortp
+#undef sortv
+
+#undef merge_sortp
+#undef merge_sortp0
+#undef merge_sortv
+#undef radix_sortp
+#undef radix_passp_be
+#undef radix_passp0_be
+#undef radix_passp_e
+#undef radix_passp_m
+#undef radix_passp_b
+#undef radix_passp0_b
+#undef radix_sortv
+#undef radix_passv
+#undef radix_zeros
+#undef radix_offsets
+#undef radix_count
+#undef sort_data
+
diff --git a/src/jl/sort_test.c b/src/jl/sort_test.c
new file mode 100644
index 0000000..acd0bb3
--- /dev/null
+++ b/src/jl/sort_test.c
@@ -0,0 +1,113 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "mem.h"
+#include "sort.h"
+
+#define SMALL 22
+#define NUM   500
+#define SI 9
+
+ulong A[NUM][SI], Av[NUM];
+uint  B[NUM][SI], Bv[NUM];
+
+uint P[NUM], Q[NUM];
+
+int main()
+{
+  buffer buf = {0,0,0};
+  uint i;
+
+  /*buffer_init(&buf, sortp_long_worksize(NUM,0));*/
+
+#if 0
+  printf("\nsource:\n");
+#endif
+  for(i=0;i!=NUM;++i) {
+    A[i][0]=rand();
+    A[i][0]<<=CHAR_BIT*sizeof(int)-1;
+    A[i][0]^=rand();
+    A[i][0]<<=CHAR_BIT*sizeof(int)-1;
+    A[i][0]^=rand();
+    if(0) A[i][0]&=0x000ff00;
+    B[i][0]=A[i][0];
+#if 0    
+    printf("%016lx\t%016lx\n",(unsigned long)A[i][0],(unsigned long)B[i][0]);
+#endif
+  }
+#if 0
+  printf("\n");
+#endif
+  printf("merge sort:\n");
+  for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i;
+  sortv_long(Av,  &A[0][0],SMALL,sizeof(ulong[SI]), &buf);
+  sortp_long(&buf,0, &A[0][0],SMALL,sizeof(ulong[SI]));
+    memcpy(P,buf.ptr,SMALL*sizeof(uint));
+  memcpy(buf.ptr,Q,SMALL*sizeof(uint));
+  sortp_long(&buf,1, &A[0][0],SMALL,sizeof(ulong[SI]));
+    memcpy(Q,buf.ptr,SMALL*sizeof(uint));
+  for(i=0;i!=SMALL;++i)
+    printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i],
+           (unsigned long)A[P[i]][0],
+           A[P[i]][0]==A[Q[i]][0],
+           Av[i]==A[P[i]][0]);
+  printf("\n");
+  printf("radix sort:\n");
+  for(i=0;i!=NUM;++i) Q[i]=NUM-1-i;
+  sortv_long(Av,  &A[0][0],NUM,sizeof(ulong[SI]), &buf);
+  sortp_long(&buf,0, &A[0][0],NUM,sizeof(ulong[SI]));
+    memcpy(P,buf.ptr,NUM*sizeof(uint));
+  memcpy(buf.ptr,Q,NUM*sizeof(uint));
+  sortp_long(&buf,1, &A[0][0],NUM,sizeof(ulong[SI]));
+    memcpy(Q,buf.ptr,NUM*sizeof(uint));
+  for(i=0;i!=NUM;++i)
+    printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i],
+           (unsigned long)A[P[i]][0],
+           A[P[i]][0]==A[Q[i]][0],
+           Av[i]==A[P[i]][0]);
+
+  printf("\nsmall integers:\n");
+  printf("\n");
+
+  printf("heap sort:\n");
+  for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i;
+  sortv(Q,  Q,SMALL,sizeof(uint), &buf);
+  for(i=0;i!=SMALL;++i) printf("\t%u\n",(unsigned)Q[i]);
+
+  printf("merge sort:\n");
+  for(i=0;i!=SMALL;++i) Q[i]=SMALL-1-i;
+  sortv(Bv,  &B[0][0],SMALL,sizeof(uint[SI]), &buf);
+  sortp(&buf,0, &B[0][0],SMALL,sizeof(uint[SI]));
+    memcpy(P,buf.ptr,SMALL*sizeof(uint));
+  memcpy(buf.ptr,Q,SMALL*sizeof(uint));
+  sortp(&buf,1, &B[0][0],SMALL,sizeof(uint[SI]));
+    memcpy(Q,buf.ptr,SMALL*sizeof(uint));
+  for(i=0;i!=SMALL;++i)
+    printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i],
+           (unsigned long)B[P[i]][0],
+           B[P[i]][0]==B[Q[i]][0],
+           B[P[i]][0]==Bv[i]);
+  printf("\n");
+  printf("radix sort:\n");
+  for(i=0;i!=NUM;++i) Q[i]=NUM-1-i;
+  sortv(Bv,  &B[0][0],NUM,sizeof(uint[SI]), &buf);
+  sortp(&buf,0, &B[0][0],NUM,sizeof(uint[SI]));
+    memcpy(P,buf.ptr,NUM*sizeof(uint));
+  memcpy(buf.ptr,Q,NUM*sizeof(uint));
+  sortp(&buf,1, &B[0][0],NUM,sizeof(uint[SI]));
+    memcpy(Q,buf.ptr,NUM*sizeof(uint));
+  for(i=0;i!=NUM;++i)
+    printf("%u\t%u\t%016lx\t%d\t%d\n",(unsigned)P[i],(unsigned)Q[i],
+           (unsigned long)B[P[i]][0],
+           B[P[i]][0]==B[Q[i]][0],
+           B[P[i]][0]==Bv[i]);
+  buffer_free(&buf);
+  return 0;
+}
+
diff --git a/src/jl/sort_test2.c b/src/jl/sort_test2.c
new file mode 100644
index 0000000..4481a16
--- /dev/null
+++ b/src/jl/sort_test2.c
@@ -0,0 +1,74 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "mem.h"
+#include "sort.h"
+#include "rdtsc.h"
+
+#if 1
+
+DEFINE_HW_COUNTER()
+
+#define N (1<<20)
+
+ulong A[N], out[N];
+uint P[N];
+
+int main()
+{
+  buffer buf = null_buffer;
+  uint i;
+  unsigned long long tic, toc;
+  unsigned r;
+  #define TIME(t, repeat, what) do { \
+    for(r=repeat;r;--r) { what; } \
+    tic = getticks(); \
+    for(r=repeat;r;--r) { what; } \
+    toc = getticks(); \
+    t = toc-tic; \
+  } while(0)
+
+  for(i=0;i!=N;++i) {
+    A[i]=rand();
+    A[i]<<=CHAR_BIT*sizeof(int)-1;
+    A[i]^=rand();
+    A[i]<<=CHAR_BIT*sizeof(int)-1;
+    A[i]^=rand();
+    if(0) A[i]&=0x000ff00;
+  }
+
+  for(i=N;i;i>>=1) {
+    unsigned long long t;
+    TIME(t, (N/i), 
+      sortv_long(out, A,i,sizeof(ulong), &buf));
+    printf("sortv %d : %g cycles per item\n",
+      (int)i, t/(double)(N/i)/(double)i);
+  }
+
+  for(i=N;i;i>>=1) {
+    unsigned long long t;
+    TIME(t, (N/i), 
+      sortp_long(&buf,0, A,i,sizeof(ulong)));
+    printf("sortp %d : %g cycles per item\n",
+      (int)i, t/(double)(N/i)/(double)i);
+  }
+
+  buffer_free(&buf);
+  return 0;
+}
+
+#else
+
+int main()
+{
+  return 0;
+}
+
+#endif
+
diff --git a/src/jl/spchol_test.c b/src/jl/spchol_test.c
new file mode 100644
index 0000000..5fc6596
--- /dev/null
+++ b/src/jl/spchol_test.c
@@ -0,0 +1,54 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "c99.h"
+#include "name.h"
+#include "fail.h"
+#include "types.h"
+#include "mem.h"
+#include "sparse_cholesky.h"
+
+int main()
+{
+#define x -1
+
+  uint i,n=7;
+  uint Aj [] = {0,2, 1,2,6, 0,1,2, 3,5,6, 4,5, 3,4,5, 1,3,6};
+  double A[] = {2,x, 2,x,x, x,x,2, 2,x,x, 2,x, x,x,2, x,x,2};
+#undef x
+  uint Arp[] = {0,2,5,8,11,13,16,19};
+  double x[7], b[7] = {0,0,0,0, 0,0,0};
+  uint o[7] = {0,2,1,6,3,5,4};
+/*
+  uint i,n=10;
+  uint Aj [] = {0,2,7, 1,4,9, 0,2,6, 3,8,9, 1,4,8,9, 5,6,7, 2,5,6, 0,5,7,8,9, 3,4,7,8, 1,3,4,7,9};
+  real A  [] = {3,x,x, 2,x,x, x,2,x, 2,x,x, x,3,x,x, 2,x,x, x,x,2, x,x,4,x,x, x,x,x,3, x,x,x,x,4};
+#undef x
+  uint Arp[] = {0,     3,     6,     9,     12,      16,    19,    22,        27,      31,    36};
+  real b[] = {1,2,3,4,5, 6,7,8,9,10};
+*/
+  struct sparse_cholesky data;
+  buffer buf;
+  buffer_init(&buf,4);
+  sparse_cholesky_factor(n,Arp,Aj,A,&data,&buf);
+  
+  for(i=0;i<n;++i) { uint j;
+    b[o[i]]=1;
+    sparse_cholesky_solve(x,&data,b);
+    for(j=0;j<n;++j) printf("\t%g",(double)x[o[j]]);
+    printf("\n");
+    b[o[i]]=0;
+  }
+  sparse_cholesky_free(&data);
+  buffer_free(&buf);
+  /*
+  sparse_cholesky_solve(b,&data,b);
+  sparse_cholesky_free(&data);
+  for(i=0;i<n;++i) printf("%g\n", b[i]);
+  */
+
+  return 0;
+}
+
+
diff --git a/src/jl/tensor.c b/src/jl/tensor.c
new file mode 100644
index 0000000..a724714
--- /dev/null
+++ b/src/jl/tensor.c
@@ -0,0 +1,82 @@
+#include "c99.h"
+#include "name.h"
+#include "types.h"
+
+#if !defined(USE_CBLAS)
+
+#define tensor_dot  PREFIXED_NAME(tensor_dot )
+#define tensor_mtxm PREFIXED_NAME(tensor_mtxm)
+
+/* Matrices are always column-major (FORTRAN style) */
+
+double tensor_dot(const double *a, const double *b, uint n)
+{
+  double sum = 0;
+  for(;n;--n) sum += *a++ * *b++;
+  return sum;
+}
+
+#  if defined(USE_NAIVE_BLAS)
+#    define tensor_mxv  PREFIXED_NAME(tensor_mxv )
+#    define tensor_mtxv PREFIXED_NAME(tensor_mtxv)
+#    define tensor_mxm  PREFIXED_NAME(tensor_mxm )
+
+/* y = A x */
+void tensor_mxv(
+  double *restrict y, uint ny,
+  const double *restrict A, const double *restrict x, uint nx)
+{
+  uint i;
+  for(i=0;i<ny;++i) y[i]=0;
+  for(;nx;--nx) {
+    const double xk = *x++;
+    for(i=0;i<ny;++i) y[i] += (*A++)*xk;
+  }
+}
+
+/* y = A^T x */
+void tensor_mtxv(
+  double *restrict y, uint ny,
+  const double *restrict A, const double *restrict x, uint nx)
+{
+  for(;ny;--ny) {
+    const double *restrict xp = x;
+    uint n = nx;
+    double sum = *A++ * *xp++;
+    for(--n;n;--n) sum += *A++ * *xp++;
+    *y++ = sum;
+  }
+}
+
+/* C = A * B */
+void tensor_mxm(
+  double *restrict C, uint nc,
+  const double *restrict A, uint na, const double *restrict B, uint nb)
+{
+  uint i,j,k;
+  for(i=0;i<nc*nb;++i) C[i]=0;
+  for(j=0;j<nb;++j,C+=nc) {
+    const double *restrict A_ = A;
+    for(k=0;k<na;++k) {
+      const double b = *B++;
+      for(i=0;i<nc;++i) C[i] += (*A_++) * b;
+    }
+  }
+}
+
+#  endif
+
+/* C = A^T * B */
+void tensor_mtxm(
+  double *restrict C, uint nc,
+  const double *restrict A, uint na, const double *restrict B, uint nb)
+{
+  uint i,j;
+  for(j=0;j<nb;++j,B+=na) {
+    const double *restrict A_ = A;
+    for(i=0;i<nc;++i,A_+=na) *C++ = tensor_dot(A_,B,na);
+  }
+}
+
+#endif
+
diff --git a/src/jl/tensor.h b/src/jl/tensor.h
new file mode 100644
index 0000000..bbff4ca
--- /dev/null
+++ b/src/jl/tensor.h
@@ -0,0 +1,199 @@
+#ifndef TENSOR_H
+#define TENSOR_H
+
+#if !defined(TYPES_H) || !defined(NAME_H)
+#warning "tensor.h" requires "types.h" and "name.h"
+#endif
+
+#if defined(USE_CBLAS)
+#  include <cblas.h>
+#  define tensor_dot(a,b,n) cblas_ddot((int)(n),a,1,b,1)
+#  define tensor_mxv(y,ny,A,x,nx) \
+     cblas_dgemv(CblasColMajor,CblasNoTrans,(int)ny,(int)nx, \
+                 1.0,A,(int)ny,x,1,0.0,y,1)
+#  define tensor_mtxv(y,ny,A,x,nx) \
+     cblas_dgemv(CblasColMajor,CblasTrans,(int)nx,(int)ny, \
+                 1.0,A,(int)nx,x,1,0.0,y,1)
+#  define tensor_mxm(C,nc,A,na,B,nb) \
+     cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans, \
+                 (int)nc,(int)nb,(int)na,1.0, \
+                 A,(int)nc,B,(int)na,0.0,C,(int)nc)
+#  define tensor_mtxm(C,nc,A,na,B,nb) \
+     cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans, \
+                 (int)nc,(int)nb,(int)na,1.0, \
+                 A,(int)na,B,(int)na,0.0,C,(int)nc)
+#else
+#  define tensor_dot  PREFIXED_NAME(tensor_dot )
+#  define tensor_mtxm PREFIXED_NAME(tensor_mtxm)
+double tensor_dot(const double *a, const double *b, uint n);
+
+/* C (nc x nb) = [A (na x nc)]^T * B (na x nb); all column-major */
+void tensor_mtxm(double *C, uint nc,
+                 const double *A, uint na, const double *B, uint nb);
+#  if defined(USE_NAIVE_BLAS)
+#    define tensor_mxv  PREFIXED_NAME(tensor_mxv )
+#    define tensor_mtxv PREFIXED_NAME(tensor_mtxv)
+#    define tensor_mxm  PREFIXED_NAME(tensor_mxm )
+/* y = A x */
+void tensor_mxv(double *y, uint ny, const double *A, const double *x, uint nx);
+
+/* y = A^T x */
+void tensor_mtxv(double *y, uint ny, const double *A, const double *x, uint nx);
+
+/* C (nc x nb) = A (nc x na) * B (na x nb); all column-major */
+void tensor_mxm(double *C, uint nc,
+                const double *A, uint na, const double *B, uint nb);
+#  else
+#    define nek_mxm FORTRAN_UNPREFIXED(mxm,MXM)
+/* C (na x nc) = A (na x nb) * B (nb x nc); all column-major */
+void nek_mxm(const double *A, const uint *na,
+             const double *B, const uint *nb,
+             double *C, const uint *nc);
+/* C (nc x nb) = A (nc x na) * B (na x nb); all column-major */
+static void tensor_mxm(double *C, uint nc,
+                       const double *A, uint na, const double *B, uint nb)
+{ nek_mxm(A,&nc,B,&na,C,&nb); }
+
+/* y = A x */
+static void tensor_mxv(double *y, uint ny,
+                       const double *A, const double *x, uint nx)
+{ uint one=1; nek_mxm(A,&ny,x,&nx,y,&one); }
+
+/* y = A^T x */
+static void tensor_mtxv(double *y, uint ny,
+                        const double *A, const double *x, uint nx)
+{ uint one=1; nek_mxm(x,&one,A,&nx,y,&ny); }
+
+#  endif
+#endif
+
+/*--------------------------------------------------------------------------
+   1-,2-,3-d Tensor Application of Row Vectors (for Interpolation)
+   
+   the 3d case:
+   v = tensor_i3(Jr,nr, Js,ns, Jt,nt, u, work)
+     gives v = [ Jr (x) Js (x) Jt ] u
+     where Jr, Js, Jt are row vectors (interpolation weights)
+     u is nr x ns x nt in column-major format (inner index is r)
+     v is a scalar
+  --------------------------------------------------------------------------*/
+
+static double tensor_i1(const double *Jr, uint nr, const double *u)
+{
+  return tensor_dot(Jr,u,nr);
+}
+
+/* work holds ns doubles */
+static double tensor_i2(const double *Jr, uint nr,
+                        const double *Js, uint ns,
+                        const double *u, double *work)
+{
+  tensor_mtxv(work,ns, u, Jr,nr);
+  return tensor_dot(Js,work,ns);
+}
+
+/* work holds ns*nt + nt doubles */
+static double tensor_i3(const double *Jr, uint nr,
+                        const double *Js, uint ns,
+                        const double *Jt, uint nt,
+                        const double *u, double *work)
+{
+  double *work2 = work+nt;
+  tensor_mtxv(work2,ns*nt,   u,     Jr,nr);
+  tensor_mtxv(work ,nt   ,   work2, Js,ns);
+  return tensor_dot(Jt,work,nt);
+}
+
+/*--------------------------------------------------------------------------
+   1-,2-,3-d Tensor Application of Row Vectors
+             for simultaneous Interpolation and Gradient computation
+   
+   the 3d case:
+   v = tensor_ig3(g, wtr,nr, wts,ns, wtt,nt, u, work)
+     gives v   = [ Jr (x) Js (x) Jt ] u
+           g_0 = [ Dr (x) Js (x) Jt ] u
+           g_1 = [ Jr (x) Ds (x) Jt ] u
+           g_2 = [ Jr (x) Js (x) Dt ] u
+     where Jr,Dr,Js,Ds,Jt,Dt are row vectors,
+       Jr=wtr, Dr=wtr+nr, etc.
+       (interpolation & derivative weights)
+     u is nr x ns x nt in column-major format (inner index is r)
+     v is a scalar, g is an array of 3 doubles
+  --------------------------------------------------------------------------*/
+
+static double tensor_ig1(double g[1],
+                         const double *wtr, uint nr,
+                         const double *u)
+{
+  g[0] = tensor_dot(wtr+nr,u,nr);
+  return tensor_dot(wtr   ,u,nr);
+}
+
+/* work holds 2*nr doubles */
+static double tensor_ig2(double g[2],
+                         const double *wtr, uint nr,
+                         const double *wts, uint ns,
+                         const double *u, double *work)
+{
+  tensor_mxm(work,nr, u,ns, wts,2);
+  g[0] = tensor_dot(wtr+nr,work   ,nr);
+  g[1] = tensor_dot(wtr   ,work+nr,nr);
+  return tensor_dot(wtr   ,work   ,nr);
+}
+
+/* work holds 2*nr*ns + 3*nr doubles */
+static double tensor_ig3(double g[3],
+                         const double *wtr, uint nr,
+                         const double *wts, uint ns,
+                         const double *wtt, uint nt,
+                         const double *u, double *work)
+{
+  const uint nrs = nr*ns;
+  double *a = work, *b = work+2*nrs, *c=b+2*nr;
+  tensor_mxm(a,nrs, u,nt, wtt,2);
+  tensor_mxm(b,nr,  a,ns, wts,2);
+  tensor_mxv(c,nr, a+nrs, wts,ns);
+  g[0] = tensor_dot(b   , wtr+nr, nr);
+  g[1] = tensor_dot(b+nr, wtr   , nr);
+  g[2] = tensor_dot(c   , wtr   , nr);
+  return tensor_dot(b   , wtr   , nr);
+}
+
+/*
+  out - nr x ns
+  u   - mr x ms
+  Jrt - mr x nr, Jst - ms x ns
+  work - nr x ms
+*/
+static void tensor_2t(double *out,
+                      const double *Jrt, uint nr, uint mr,
+                      const double *Jst, uint ns, uint ms,
+                      const double *u, double *work)
+{
+  tensor_mtxm(work,nr, Jrt,mr, u,ms);
+  tensor_mxm(out,nr, work,ms, Jst,ns);
+}
+
+/*
+  out - nr x ns x nt
+  u   - mr x ms x mt
+  Jrt - mr x nr, Jst - ms x ns, Jtt - mt x nt
+  work - nr*ms*mt + nr*ns*mt = nr*(ms+ns)*mt
+*/
+static void tensor_3t(double *out,
+                      const double *Jrt, uint nr, uint mr,
+                      const double *Jst, uint ns, uint ms,
+                      const double *Jtt, uint nt, uint mt,
+                      const double *u, double *work)
+{
+  const uint nrs=nr*ns, mst=ms*mt, nrms=nr*ms;
+  uint k;
+  double *work2 = work+nr*mst;
+  double *p; const double *q;
+  tensor_mtxm(work,nr, Jrt,mr, u,mst);
+  for(k=0,p=work2,q=work;k<mt;++k,p+=nrs,q+=nrms)
+    tensor_mxm(p,nr, q,ms, Jst,ns);
+  tensor_mxm(out,nrs, work2,mt, Jtt,nt);
+}
+
+#endif
diff --git a/src/jl/types.h b/src/jl/types.h
new file mode 100644
index 0000000..21977fa
--- /dev/null
+++ b/src/jl/types.h
@@ -0,0 +1,79 @@
+#ifndef TYPES_H
+#define TYPES_H
+
+/* 
+  Define the integer types used throughout the code,
+  controlled by preprocessor macros.
+  
+  The integer type sint/uint (signed/unsigned) is used
+  most frequently, e.g., for indexing into local arrays,
+  and for processor ids. It can be one of
+  
+    macro             sint/uint type
+    
+    (default)         int
+    USE_LONG          long
+    USE_LONG_LONG     long long
+    
+  The slong/ulong type is used in relatively few places
+  for global identifiers and indices. It can be one of
+
+    macro             slong/ulong type
+    
+    (default)         int
+    GLOBAL_LONG       long
+    GLOBAL_LONG_LONG  long long
+
+  Since the long long type is not ISO C90, it is never
+  used unless explicitly asked for.
+*/
+
+#if defined(USE_LONG_LONG) || defined(GLOBAL_LONG_LONG)
+typedef long long long_long;
+#  define WHEN_LONG_LONG(x) x
+#  if !defined(LLONG_MAX)
+#    if defined(LONG_LONG_MAX)
+#      define LLONG_MAX LONG_LONG_MAX
+#    else
+#      define LLONG_MAX 9223372036854775807
+#    endif
+#  endif
+#  if !defined(LLONG_MIN)
+#    if defined(LONG_LONG_MIN)
+#      define LLONG_MIN LONG_LONG_MIN
+#    else
+#      define LLONG_MIN -9223372036854775807
+#    endif
+#  endif
+#else
+#  define WHEN_LONG_LONG(x)
+#endif
+
+#if !defined(USE_LONG) && !defined(USE_LONG_LONG)
+#  define TYPE_LOCAL(i,l,ll) i
+#elif defined(USE_LONG)
+#  define TYPE_LOCAL(i,l,ll) l
+#elif defined(USE_LONG_LONG)
+#  define TYPE_LOCAL(i,l,ll) ll
+#endif
+
+#if !defined(GLOBAL_LONG) && !defined(GLOBAL_LONG_LONG)
+#  define TYPE_GLOBAL(i,l,ll) i
+#elif defined(GLOBAL_LONG)
+#  define TYPE_GLOBAL(i,l,ll) l
+#else
+#  define TYPE_GLOBAL(i,l,ll) ll
+#endif
+
+/* local integer type: for quantities O(N/P) */
+#define sint   signed TYPE_LOCAL(int,long,long long)
+#define uint unsigned TYPE_LOCAL(int,long,long long)
+#define iabs TYPE_LOCAL(abs,labs,llabs)
+
+/* global integer type: for quantities O(N) */
+#define slong   signed TYPE_GLOBAL(int,long,long long)
+#define ulong unsigned TYPE_GLOBAL(int,long,long long)
+#define iabsl TYPE_GLOBAL(abs,labs,llabs)
+
+#endif
+
diff --git a/src/k10_mxm.c b/src/k10_mxm.c
new file mode 100644
index 0000000..a8ba95d
--- /dev/null
+++ b/src/k10_mxm.c
@@ -0,0 +1,56 @@
+#ifndef FNAME_H
+#define FNAME_H
+
+#ifdef UPCASE
+#  define FORTRAN_NAME(low,up) up
+#else
+#ifdef UNDERSCORE
+#  define FORTRAN_NAME(low,up) low##_
+#else
+#  define FORTRAN_NAME(low,up) low
+#endif
+#endif
+
+#endif
+
+#define k10_mxm FORTRAN_NAME(k10_mxm, K10_MXM)
+
+void tune_mxm888    (double*, double*, double*);
+void tune_mxm8864   (double*, double*, double*);
+void tune_mxm6488   (double*, double*, double*);
+void tune_mxm101010 (double*, double*, double*);
+void tune_mxm1010100(double*, double*, double*);
+void tune_mxm1001010(double*, double*, double*);
+
+
+int k10_mxm(double* a, int* sz1, double* b, int* sz2, double* c, int* sz3)
+{
+    int m, k, n;
+
+    m = *sz1;
+    k = *sz2;
+    n = *sz3;
+
+    if ((unsigned)a%16 != 0 || (unsigned)b%16 != 0 || (unsigned)c%16 != 0){
+       return 1;}
+
+    // 8,8,8  8,8,64  64,8,8
+    if (k == 8){
+       if (m == 8){
+          if     (n ==  8){tune_mxm888 (a,b,c); return 0;}
+          else if(n == 64){tune_mxm8864(a,b,c); return 0;}
+       }
+       else if (m == 64 && n == 8){tune_mxm6488(a,b,c); return 0;}
+    }
+
+    // 10,10,10  10,10,100  100,10,10
+    if (k == 10){
+       if (m == 10){
+          if     (n ==  10){tune_mxm101010 (a,b,c); return 0;}
+          else if(n == 100){tune_mxm1010100(a,b,c); return 0;}
+       }
+       else if (m == 100 && n == 10){tune_mxm1001010(a,b,c); return 0;}
+    }
+
+    return 1;
+}
diff --git a/src/makenek.inc b/src/makenek.inc
new file mode 100644
index 0000000..6d97ace
--- /dev/null
+++ b/src/makenek.inc
@@ -0,0 +1,324 @@
+# This include file is used by the makenek script
+# to automatically create a makefile for Nek5000 
+# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC
+# ------------------------------------------------
+
+echo "makenek - automatic build tool for Nek5000"
+
+if [ "$PPLIST" == "?" ]; then
+  echo "available pre-processor symbols:" 
+  echo "  BGP       enable Blue Gene/P optimizations "
+  echo "  BGQ       compile for Blue Gene/Q "
+  echo "  K10_MXM   use optimized MxM kernel for AMD Family 10h processors" 
+  echo "  TIMERS    turns on detailed routine timers"
+  echo "  MPITIMER  use MPI_Wtime for timing"
+  echo "  BGQTIMER  use BG/Q cycle counter for timing"
+  echo "  CGTTIMER  use clock_gettime for timing"
+  echo "  NITER=x   number of cg iterations, default 100"
+  echo "  LOG       output progress"
+  exit 1
+fi
+
+
+if [ "$1" == "clean" ]; then
+  make clean
+  exit 0
+fi
+
+NOCOMPILE=0
+if [ "$1" == "-nocompile" ]; then
+  NOCOMPILE=1
+fi 
+
+CASENAME=$1
+CASEDIR=`pwd`
+APATH_SRC=`cd $SOURCE_ROOT; pwd`
+SOURCE_ROOT=$APATH_SRC
+
+# do some basic checks
+if [ "$CASEDIR" == "$SOURCE_ROOT" ]; then
+   echo "FATAL ERROR: Working directory has to be different from the source!"
+   exit 1
+fi
+
+if [ ! -f SIZE ]; then
+   echo "FATAL ERROR: Cannot find SIZE!"
+   exit 1
+fi
+
+if [ ! -f ./makefile.template ]; then
+  echo "FATAL ERROR: Cannot find ./makefile.template!"
+  exit 1
+fi
+
+# test F77 compiler
+which `echo $F77 | awk '{print $1}'` 1>/dev/null
+if [ $? -ne 0 ]; then
+  echo "FATAL ERROR: Cannot find $F77!"
+  exit 1
+fi
+\rm test_f77.o 2>/dev/null
+
+# basic compiler test
+cat > test_f77.f << _ACEOF
+      subroutine test
+      end
+_ACEOF
+$F77 -c test_f77.f >/dev/null
+if [ ! -f test_f77.o ]; then
+  echo "FATAL ERROR: Basic compiler test for $F77 failed!"
+  exit 1
+fi
+\rm test_f77.* 2>/dev/null
+
+# test C compiler
+which `echo $CC | awk '{print $1}'` 1>/dev/null
+if [ $? -ne 0 ]; then
+  echo "FATAL ERROR: Cannot find $CC!"
+  exit 1
+fi
+\rm test_cc.o 2>/dev/null
+
+cat > test_cc.c << _ACEOF
+      void function(){}
+_ACEOF
+$CC -c test_cc.c 1>/dev/null
+if [ ! -f test_cc.o ]; then
+  echo "FATAL ERROR: Basic compiler test for $CC failed!"
+  exit 1
+fi
+\rm test_cc.* 2>/dev/null
+
+# initial clean-up
+rm -f nekbone 2>/dev/null
+
+# Check if the compiler adds an underscore to external functions
+UNDERSCORE=false
+cat > test_underscore.f << _ACEOF
+      subroutine underscore_test
+        call byte_write
+      end
+_ACEOF
+$F77 -c test_underscore.f 2>&1 >/dev/null 
+nm test_underscore.o | grep byte_write_ 1>/dev/null
+if [ $? -eq 0 ] 
+then
+  UNDERSCORE=true
+fi
+\rm test_underscore.* 2>/dev/null
+
+# trying to figure which compiler the wrapper is using
+F77ok=0
+
+F77comp_=`$F77 -showme 2>/dev/null 1>.tmp || true`
+F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true`
+if [ -f "`which $F77comp 2>/dev/null`" ]; then
+  F77ok=1
+fi
+
+if [ $F77ok -eq 0 ]; then
+  F77comp_=`$F77 -show 2>/dev/null 1>.tmp || true`
+  F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true`
+  if [ -f "`which $F77comp 2>/dev/null`" ]; then
+    F77ok=1
+  fi
+fi
+
+if [ $F77ok -eq 0 ]; then
+  F77comp_=`$F77 -craype-verbose 2>/dev/null 1>.tmp || true`
+  F77comp=`cat .tmp | awk '{print $1}' | awk -F/ '{print $NF}' || true`
+  if [ -f "`which $F77comp 2>/dev/null`" ]; then
+    F77ok=1
+  fi
+fi
+
+if [ $F77ok -eq 0 ]; then
+  F77comp=`echo $F77 | awk '{print $1}'`
+  if [ -f "`which $F77comp 2>/dev/null`" ]; then
+    F77ok=1
+  fi
+fi
+
+\rm -f .tmp
+if [ $F77ok -eq 0 ]; then
+  F77comp="unknown"
+fi
+
+CFE_FLAG=""
+# assign F77 compiler specific flags
+case $F77comp in
+  *pgf*)        P="-r8 -Mpreprocess"
+               ;;
+  *gfortran*)   P="-fcray-pointer -fdefault-real-8 -x f77-cpp-input"
+               ;;
+  *ifort*)      P="-r8 -fpconstant -fpp"
+                CFE_FLAG="-xHost"
+               ;;
+  *pathf*)      P="-r8 -cpp -fno-second-underscore"
+               ;;
+  *xlf*)       P="-qrealsize=8 -qdpc=e -qsuffix=cpp=f"
+               PPPO="-WF,"
+               F77="${F77} -qsuppress=cmpmsg"
+               ;;
+  *ftn*)        P="-r8 -Mpreprocess"
+               ;;
+  *sunf*)       P="-r8const -xtypemap=real:64 -fpp"
+               ;;
+  *open*)       P="-r8 -cpp -fno-second-underscore"
+               ;;
+  *)  echo "ERROR: Unable to detect compiler!"
+      echo "        - don't know how to promote datatype REAL to 8 bytes"
+      echo "        - don't know how to invoke the C pre-processor (CPP) before compilation"
+      echo "       Please edit the makefile and specify the requested compiler flags using the P variable."
+      echo ""
+      P="<specify your compiler flags here>"
+      NOCOMPILE=1
+      read;;
+esac
+export PPPO
+
+# Check ptr size
+cat > tmp.c << _ACEOF
+#include <stdlib.h>
+#include <stdio.h>
+int main()
+{
+  int *p;printf("%li\n",sizeof(p));
+}
+_ACEOF
+$CC $CFE_FLAG tmp.c 2>&1>/dev/null
+ptrSize=`./a.out`
+rm tmp.c a.out
+if [ "$ptrSize" == "8" ]
+then
+  PPLIST="${PPLIST} PTRSIZE8"
+fi
+
+# set preprocessor symbols
+if [ "$IFMPI" == "false" -o "$IFMPI" == "no" ]; then
+  IFMPI=false
+else
+  # default
+  IFMPI=true
+  PPLIST="${PPLIST} MPI"
+fi
+export IFMPI
+
+# Check size of long int
+cat > tmp.c << _ACEOF
+#include <stdlib.h>
+#include <stdio.h>
+int main()
+{
+  int i;
+  i=sizeof(long int);
+  printf("%i\n",i);
+}
+_ACEOF
+$CC $CFE_FLAG tmp.c 2>&1>/dev/null
+longIntTest=`./a.out`
+rm tmp.c a.out
+if [ "$longIntTest" == "8" ]
+then
+  PPLIST="${PPLIST} LONGINT8"
+fi
+
+if [ "$UNDERSCORE" == "true" ]; then
+  PPLIST="${PPLIST} UNDERSCORE"
+fi 
+
+PPLIST="${PPLIST} GLOBAL_LONG_LONG"
+
+MXM_USER="mxm_std.o blas.o"
+echo $PPLIST | grep 'BGP' >/dev/null 
+if [ $? -eq 0 ]; then
+   MXM_USER="mxm_std.o bg_aligned3.o bg_mxm44.o bg_mxm44_uneven.o bg_mxm3.o blas.o" 
+   OPT_FLAGS_STD="-qarch=450 -qtune=450"
+   OPT_FLAGS_MAG="-O5 -qarch=450d -qtune=450"
+fi
+echo $PPLIST | grep 'BLAS_MXM' >/dev/null 
+if [ $? -eq 0 ]; then
+   MXM_USER="mxm_std.o" 
+fi
+
+# set optimization flags
+L0="\$(G) -O0"
+L2="\$(G) -O2"
+L3="\$(G) -O3" 
+L4="\$(L3)"
+
+# user specified opt flags
+if [ "$OPT_FLAGS_STD" != "" ]; then
+  echo $OPT_FLAGS_STD | grep "\-O." 1>/dev/null
+  if [ $? -eq 0 ]; then
+    L2="\$(G) $OPT_FLAGS_STD"
+    L3="\$(G) $OPT_FLAGS_STD" 
+  else
+    L2="\$(G) -O2 $OPT_FLAGS_STD"
+    L3="\$(G) -O3 $OPT_FLAGS_STD"
+  fi
+fi
+
+if [ "$OPT_FLAGS_MAG" != "" ]; then
+    L4="\$(G) $OPT_FLAGS_MAG"
+fi
+
+if [ "$USR_LIB" != "" ]; then
+    USR_LFLAGS="${USR_LFLAGS} ${USR_LIB}"
+fi
+
+# tweak makefile template 
+echo "generating makefile ..."
+rm -rf makefile 2>/dev/null
+
+sed -e "s:^F77[ ]*=.*:F77\:=$F77:" \
+-e "s:^CC[ ]*=.*:CC\:=$CC:" \
+-e "s:^G[ ]*=.*:G\:=$G:" \
+-e "s:^OPT_FLAGS[ ]*=.*:OPT_FLAGS\:=$OPT_FLAGS:" \
+-e "s/^P[ ]*=.*/P:=$P/" \
+-e "s/^L0[ ]*=.*/L0=$L0/" \
+-e "s/^L2[ ]*=.*/L2=$L2/" \
+-e "s/^L3[ ]*=.*/L3=$L3/" \
+-e "s/^L4[ ]*=.*/L4=$L4/" \
+-e "s/^PPPO[ ]*=.*/PPPO=$PPPO/" \
+-e "s/^PPS[ ]*=.*/PPS=$PPLIST/" \
+-e "s:^MXM[ ]*=.*:MXM=$MXM_USER:" \
+-e "s/^IFMPI[ ]*=.*/IFMPI:=$IFMPI/" \
+-e "s:^USR[ ]*=.*:USR\:=$USR:" \
+-e "s:^USR_LFLAGS[ ]*=.*:USR_LFLAGS\:=$USR_LFLAGS:" \
+-e "s:^S[ ]*=.*:S\:=${SOURCE_ROOT}:" ./makefile.template >.makefile
+
+echo $G | grep '\-g' 1>/dev/null
+if [ $? -eq 0 ]; then
+  sed 's/-O[1-4]/-O0/g' .makefile > .makefile.tmp
+  mv .makefile.tmp .makefile
+  echo "Activate DEBUG mode"
+fi
+
+if [ "$USR" != "" ]; then
+  echo "###########################################################" >> makefile
+  echo "include makefile_usr.inc" >> .makefile
+fi
+
+if [ -f .makefile ]; then
+  sed -e "1i\\
+### makefile automatically created by makenek `date +"%m/%d/%Y %T"` ###" \
+-e "s:^CASEDIR[ ]*=.*:CASEDIR\:=${CASEDIR}:" \
+-e "s:^CASENAME[ ]*=.*:CASENAME\:=${CASENAME}:" .makefile > makefile 
+else
+  echo "ERROR: Nek Makefile could not be created!"
+  exit 1 
+fi
+\rm .makefile 2>/dev/null
+
+# tweak SIZE file
+if [ -f "./SIZE" ]; then
+  cat SIZE | grep -i 'lxo' >/dev/null
+else
+  echo "FATAL ERROR: Cannot find SIZE"
+  exit 1
+fi
+
+if [ $NOCOMPILE -eq 1 ]; then
+  exit 0
+fi 
diff --git a/src/math.f b/src/math.f
new file mode 100644
index 0000000..48a24c3
--- /dev/null
+++ b/src/math.f
@@ -0,0 +1,1402 @@
+c-----------------------------------------------------------------------
+      SUBROUTINE BLANK(A,N)
+      CHARACTER*1 A(1)
+      CHARACTER*1 BLNK
+      SAVE        BLNK
+      DATA        BLNK /' '/
+ 
+      DO 10 I=1,N
+         A(I)=BLNK
+   10 CONTINUE
+      RETURN
+      END
+c-----------------------------------------------------------------------
+      SUBROUTINE VSQ (A,N)
+      DIMENSION  A(1)
+ 
+      DO 100 I = 1, N
+ 100     A(I) = A(I)**2
+      RETURN
+      END
+c-----------------------------------------------------------------------
+      SUBROUTINE VSQRT(A,N)
+      DIMENSION  A(1)
+ 
+      DO 100 I = 1, N
+ 100     A(I) = SQRT(A(I))
+      RETURN
+      END
+c-----------------------------------------------------------------------
+      subroutine invers2(a,b,n)
+      REAL A(1),B(1)
+ 
+      DO 100 I=1,N
+         A(I)=1./B(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine invcol1(a,n)
+      REAL A(1)
+ 
+      DO 100 I=1,N
+         A(I)=1./A(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine invcol2(a,b,n)
+ 
+      REAL A(1),B(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)/B(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine invcol3(a,b,c,n)
+      REAL A(1),B(1),C(1)
+ 
+ 
+      DO 100 I=1,N
+         A(I)=B(I)/C(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine col4(a,b,c,d,n)
+      REAL A(1),B(1),C(1),D(1)
+ 
+      DO 100 I=1,N
+         A(I)=B(I)*C(I)*D(I)
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine Xaddcol3(a,b,c,n)
+      REAL A(1),B(1),C(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)+B(I)*C(I)
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine addcol4(a,b,c,d,n)
+      REAL A(1),B(1),C(1),D(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)+B(I)*C(I)*D(I)
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine ascol5 (a,b,c,d,e,n)
+      REAL A(1),B(1),C(1),D(1),E(1)
+ 
+      DO 100 I=1,N
+         A(I) = B(I)*C(I)-D(I)*E(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine sub2(a,b,n)
+      REAL A(1),B(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)-B(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine sub3(a,b,c,n)
+      REAL A(1),B(1),C(1)
+ 
+      DO 100 I=1,N
+         A(I)=B(I)-C(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine subcol3(a,b,c,n)
+      REAL A(1),B(1),C(1)
+ 
+ 
+      DO 100 I=1,N
+         A(I)=A(I)-B(I)*C(I)
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine subcol4(a,b,c,d,n)
+      REAL A(1),B(1),C(1),D(1)
+ 
+ 
+      DO 100 I=1,N
+         A(I)=A(I)-B(I)*C(I)*D(I)
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine rzero(a,n)
+      DIMENSION  A(1)
+
+      DO 100 I = 1, N
+ 100     A(I ) = 0.0
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine izero(a,n)
+      INTEGER A(1)
+ 
+      DO 100 I = 1, N
+ 100     A(I ) = 0
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine ione(a,n)
+      INTEGER   A(1)
+      DO 100 I = 1, N
+ 100     A(I ) = 1
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine rone(a,n)
+      DIMENSION  A(1)
+      DO 100 I = 1, N
+ 100     A(I ) = 1.0
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine cfill(a,b,n)
+      DIMENSION  A(1)
+ 
+      DO 100 I = 1, N
+ 100     A(I) = B
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine ifill(ia,ib,n)
+      DIMENSION IA(1)
+ 
+      DO 100 I = 1, N
+ 100     IA(I) = IB
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine copy(a,b,n)
+      real a(1),b(1)
+
+      do i=1,n
+         a(i)=b(i)
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine chcopy(a,b,n)
+      CHARACTER*1 A(1), B(1)
+ 
+      DO 100 I = 1, N
+ 100     A(I) = B(I)
+      return
+      END
+ 
+      subroutine icopy(a,b,n)
+      INTEGER A(1), B(1)
+ 
+      DO 100 I = 1, N
+ 100     A(I) = B(I)
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine i8copy(a,b,n)
+      INTEGER*8 A(1), B(1)
+ 
+      DO 100 I = 1, N
+ 100     A(I) = B(I)
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine chsign(a,n)
+      REAL A(1)
+ 
+      DO 100 I=1,N
+         A(I) = -A(I)
+ 100  CONTINUE
+      return
+      END
+ 
+c-----------------------------------------------------------------------
+      subroutine cmult(a,const,n)
+      REAL A(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)*CONST
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine cadd(a,const,n)
+      REAL A(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)+CONST
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine iadd(i1,iscal,n)
+      DIMENSION I1(1)
+ 
+      DO 10 I=1,N
+         I1(I)=I1(I)+ISCAL
+   10 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine cadd2(a,b,const,n)
+      REAL A(1),B(1)
+ 
+      DO 100 I=1,N
+         A(I)=B(I)+CONST
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      real function vlmin(vec,n)
+      REAL VEC(1)
+      TMIN = 99.0E20
+ 
+      DO 100 I=1,N
+         TMIN = MIN(TMIN,VEC(I))
+ 100  CONTINUE
+      VLMIN = TMIN
+      return
+      END
+c-----------------------------------------------------------------------
+      integer function ivlmin(vec,n)
+      integer vec(1),tmin
+      if (n.eq.0) then
+         ivlmin=0
+         return
+      endif
+      tmin = 8888888
+      do i=1,n
+         tmin = min(tmin,vec(i))
+      enddo
+      ivlmin = tmin
+      return
+      end
+c-----------------------------------------------------------------------
+      integer function ivlmax(vec,n)
+      integer vec(1),tmax
+      if (n.eq.0) then
+         ivlmax=0
+         return
+      endif
+      TMAX =-8888888
+      do i=1,n
+         TMAX = MAX(TMAX,VEC(I))
+      enddo
+      Ivlmax = tmax
+      return
+      end
+c-----------------------------------------------------------------------
+      real function vlmax(vec,n)
+      REAL VEC(1)
+      TMAX =-99.0E20
+      do i=1,n
+         TMAX = MAX(TMAX,VEC(I))
+      enddo
+      VLMAX = TMAX
+      return
+      END
+c-----------------------------------------------------------------------
+      real function vlamax(vec,n)
+      REAL VEC(1)
+      TAMAX = 0.0
+ 
+      DO 100 I=1,N
+         TAMAX = MAX(TAMAX,ABS(VEC(I)))
+ 100  CONTINUE
+      VLAMAX = TAMAX
+      return
+      END
+c-----------------------------------------------------------------------
+      real function vlsum(vec,n)
+      REAL VEC(1)
+ 
+      SUM = 0.
+ 
+      DO 100 I=1,N
+         SUM=SUM+VEC(I)
+ 100  CONTINUE
+      VLSUM = SUM
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine vcross (u1,u2,u3,v1,v2,v3,w1,w2,w3,n)
+C
+C     Compute a Cartesian vector cross product.
+C
+      DIMENSION U1(1),U2(1),U3(1)
+      DIMENSION V1(1),V2(1),V3(1)
+      DIMENSION W1(1),W2(1),W3(1)
+ 
+ 
+      DO 100 I=1,N
+         U1(I) = V2(I)*W3(I) - V3(I)*W2(I)
+         U2(I) = V3(I)*W1(I) - V1(I)*W3(I)
+         U3(I) = V1(I)*W2(I) - V2(I)*W1(I)
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine vdot2 (dot,u1,u2,v1,v2,n)
+C
+C     Compute a Cartesian vector dot product. 2-d version
+C
+      DIMENSION DOT(1)
+      DIMENSION U1(1),U2(1)
+      DIMENSION V1(1),V2(1)
+ 
+ 
+      DO 100 I=1,N
+         DOT(I) = U1(I)*V1(I) + U2(I)*V2(I) 
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine vdot3 (dot,u1,u2,u3,v1,v2,v3,n)
+C
+C     Compute a Cartesian vector dot product. 3-d version
+C
+      DIMENSION DOT(1)
+      DIMENSION U1(1),U2(1),U3(1)
+      DIMENSION V1(1),V2(1),V3(1)
+ 
+ 
+      DO 100 I=1,N
+         DOT(I) = U1(I)*V1(I) + U2(I)*V2(I) + U3(I)*V3(I)
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine addtnsr(s,h1,h2,h3,nx,ny,nz)
+C
+C     Map and add to S a tensor product form of the three functions H1,H2,H3.
+C     This is a single element routine used for deforming geometry.
+C
+      DIMENSION H1(1),H2(1),H3(1)
+      DIMENSION S(NX,NY,NZ)
+ 
+      DO 200 IZ=1,NZ
+      DO 200 IY=1,NY
+         HH = H2(IY)*H3(IZ)
+         DO 100 IX=1,NX
+            S(IX,IY,IZ)=S(IX,IY,IZ)+HH*H1(IX)
+  100    CONTINUE
+  200 CONTINUE
+      return
+      END
+      function ltrunc(string,l)
+      CHARACTER*1 STRING(L)
+      CHARACTER*1   BLNK
+      DATA BLNK/' '/
+ 
+      DO 100 I=L,1,-1
+         L1=I
+         IF (STRING(I).NE.BLNK) GOTO 200
+  100 CONTINUE
+      L1=0
+  200 CONTINUE
+      LTRUNC=L1
+      return
+      END
+c-----------------------------------------------------------------------
+      function mod1(i,n)
+C
+C     Yields MOD(I,N) with the exception that if I=K*N, result is N.
+C
+      MOD1=0
+      IF (I.EQ.0) THEN
+         return
+      ENDIF
+      IF (N.EQ.0) THEN
+         WRITE(6,*) 
+     $  'WARNING:  Attempt to take MOD(I,0) in function mod1.'
+         return
+      ENDIF
+      II = I+N-1
+      MOD1 = MOD(II,N)+1
+      return
+      END
+c-----------------------------------------------------------------------
+      integer function log2(k)
+      RK=(K)
+      RLOG=LOG10(RK)
+      RLOG2=LOG10(2.0)
+      RLOG=RLOG/RLOG2+0.5
+      LOG2=INT(RLOG)
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine iflip(i1,n)
+      DIMENSION I1(1)
+      N1=N+1
+      N2=N/2
+      DO 10 I=1,N2
+         ILAST=N1-I
+         ITMP=I1(ILAST)
+         I1(ILAST)=I1(I)
+         I1(I)=ITMP
+   10 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine iswap(b,ind,n,temp)
+      INTEGER B(1),IND(1),TEMP(1)
+C***
+C***  SORT ASSOCIATED ELEMENTS BY PUTTING ITEM(JJ)
+C***  INTO ITEM(I), WHERE JJ=IND(I).
+C***
+      DO 20 I=1,N
+         JJ=IND(I)
+         TEMP(I)=B(JJ)
+   20 CONTINUE
+      DO 30 I=1,N
+   30 B(I)=TEMP(I)
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine col2(a,b,n)
+      real a(1),b(1)
+
+!xbm* unroll (10)
+      do i=1,n
+         a(i)=a(i)*b(i)
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine col2c(a,b,c,n)
+      real a(1),b(1),c
+
+      do i=1,n
+         a(i)=a(i)*b(i)*c
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine col3(a,b,c,n)
+      real a(1),b(1),c(1)
+
+!xbm* unroll (10)
+      do i=1,n
+         a(i)=b(i)*c(i)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine add2(a,b,n)
+      real a(1),b(1)
+
+!xbm* unroll (10)
+      do i=1,n
+         a(i)=a(i)+b(i)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine add3(a,b,c,n)
+      real a(1),b(1),c(1)
+
+!xbm* unroll (10)
+      do i=1,n
+         a(i)=b(i)+c(i)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine addcol3(a,b,c,n)
+      real a(1),b(1),c(1)
+
+!xbm* unroll (10)
+      do i=1,n
+         a(i)=a(i)+b(i)*c(i)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine add2s1(a,b,c1,n)
+      real a(1),b(1)
+ 
+      DO 100 I=1,N
+        A(I)=C1*A(I)+B(I)
+  100 CONTINUE
+      return
+      END
+ 
+c-----------------------------------------------------------------------
+      subroutine add2s2(a,b,c1,n)
+      real a(1),b(1)
+ 
+      DO 100 I=1,N
+        A(I)=A(I)+C1*B(I)
+  100 CONTINUE
+      return
+      END
+ 
+c-----------------------------------------------------------------------
+      subroutine add3s2(a,b,c,c1,c2,n)
+      real a(1),b(1),c(1)
+ 
+      DO 100 I=1,N
+        A(I)=C1*B(I)+C2*C(I)
+  100 CONTINUE
+      return
+      END
+ 
+c-----------------------------------------------------------------------
+      subroutine add4(a,b,c,d,n)
+      REAL A(1),B(1),C(1),D(1)
+ 
+      DO 100 I=1,N
+         A(I)=B(I)+C(I)+D(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      real function vlsc2(x,y,n)
+      REAL X(1),Y(1)
+ 
+      s = 0.
+      do i=1,n
+         s = s + x(i)*y(i)
+      enddo
+      vlsc2=s
+      return
+      end
+c-----------------------------------------------------------------------
+      real function vlsc21(x,y,n)
+      real x(1),y(1)
+ 
+      s = 0.
+      do i=1,n
+         s = s + x(i)*x(i)*y(i)
+      enddo
+      vlsc21=s
+      return
+      end
+
+
+C----------------------------------------------------------------------------
+C
+C     Vector reduction routines which require communication 
+C     on a parallel machine. These routines must be substituted with
+C     appropriate routines which take into account the specific architecture.
+C
+C----------------------------------------------------------------------------
+      function glsc3(a,b,mult,n)
+C
+C     Perform inner-product in double precision
+C
+      real a(1),b(1),mult(1)
+      real tmp,work(1)
+ 
+      tmp = 0.0
+      do 10 i=1,n
+         tmp = tmp + a(i)*b(i)*mult(i)
+ 10   continue
+      call gop(tmp,work,'+  ',1)
+      glsc3 = tmp
+      return
+      end
+c-----------------------------------------------------------------------
+      function glsc2(x,y,n)
+C
+C     Perform inner-product in double precision
+C
+      real x(1), y(1)
+      real tmp,work(1)
+ 
+      tmp=0.0
+      do 10 i=1,n
+         tmp = tmp+ x(i)*y(i)
+   10 continue
+      CALL GOP(TMP,WORK,'+  ',1)
+      GLSC2 = TMP
+      return
+      END
+c-----------------------------------------------------------------------
+      function glsc23(x,y,z,n)
+c
+C     Perform inner-product  x*x*y*z
+c
+      real x(1), y(1),z(1)
+      real tmp,work(1)
+
+      ds = 0.0
+      do 10 i=1,n
+         ds=ds+x(i)*x(i)*y(i)*z(i)
+   10 continue
+      tmp=ds
+      call gop(tmp,work,'+  ',1)
+      glsc23 = tmp
+      return
+      end
+c-----------------------------------------------------------------------
+c     real function gl2norm(a,n)
+
+c     include 'SIZE'
+c     include 'MASS'
+
+c     real a(1)
+
+c     common /scrsf/ w1 (lx1,ly1,lz1,lelt)
+
+c     call col3 (w1,a,a,n)
+c     call col2 (w1,bm1,n)
+c     gl2norm = sqrt(glsum (w1,n)/volvm1)
+
+c     return
+c     end
+c-----------------------------------------------------------------------
+      function glsum (x,n)
+      DIMENSION X(1)
+      DIMENSION TMP(1),WORK(1)
+      TSUM = 0.
+      DO 100 I=1,N
+         TSUM = TSUM+X(I)
+ 100  CONTINUE
+      TMP(1)=TSUM
+      CALL GOP(TMP,WORK,'+  ',1)
+      GLSUM = TMP(1)
+      return
+      END
+c-----------------------------------------------------------------------
+      real function glamax(a,n)
+      REAL A(1)
+      DIMENSION TMP(1),WORK(1)
+      TMAX = 0.0
+      DO 100 I=1,N
+         TMAX = MAX(TMAX,ABS(A(I)))
+ 100  CONTINUE
+      TMP(1)=TMAX
+      CALL GOP(TMP,WORK,'M  ',1)
+      GLAMAX=ABS(TMP(1))
+      return
+      END
+c-----------------------------------------------------------------------
+      real function glamin(a,n)
+      real a(1)
+      dimension tmp(1),work(1)
+      tmin = 9.e28
+      do 100 i=1,n
+         tmin = min(tmin,abs(a(i)))
+ 100  continue
+      tmp(1)=tmin
+      call gop(tmp,work,'m  ',1)
+      glamin=abs(tmp(1))
+      return
+      end
+c-----------------------------------------------------------------------
+      function iglmin(a,n)
+      integer a(1),tmin
+      integer tmp(1),work(1)
+      tmin=  999999999
+      do i=1,n
+         tmin=min(tmin,a(i))
+      enddo
+      tmp(1)=tmin
+      call igop(tmp,work,'m  ',1)
+      iglmin=tmp(1)
+      return
+      end
+c-----------------------------------------------------------------------
+      function iglmax(a,n)
+      integer a(1),tmax
+      integer tmp(1),work(1)
+      tmax= -999999999
+      do i=1,n
+         tmax=max(tmax,a(i))
+      enddo
+      tmp(1)=tmax
+      call igop(tmp,work,'M  ',1)
+      iglmax=tmp(1)
+      return
+      end
+c-----------------------------------------------------------------------
+      function iglsum(a,n)
+      integer a(1),tsum
+      integer tmp(1),work(1)
+      tsum= 0
+      do i=1,n
+         tsum=tsum+a(i)
+      enddo
+      tmp(1)=tsum
+      call igop(tmp,work,'+  ',1)
+      iglsum=tmp(1)
+      return
+      end
+C-----------------------------------------------------------------------
+      integer*8 function i8glsum(a,n)
+      integer*8 a(1),tsum
+      integer*8 tmp(1),work(1)
+      tsum= 0
+      do i=1,n
+         tsum=tsum+a(i)
+      enddo
+      tmp(1)=tsum
+      call i8gop(tmp,work,'+  ',1)
+      i8glsum=tmp(1)
+      return
+      end
+C-----------------------------------------------------------------------
+      function glmax(a,n)
+      REAL A(1)
+      DIMENSION TMP(1),WORK(1)
+      TMAX=-99.0e20
+      DO 100 I=1,N
+         TMAX=MAX(TMAX,A(I))
+  100 CONTINUE
+      TMP(1)=TMAX
+      CALL GOP(TMP,WORK,'M  ',1)
+      GLMAX=TMP(1)
+      return
+      END
+c-----------------------------------------------------------------------
+      function glmin(a,n)
+      REAL A(1)
+      DIMENSION TMP(1),WORK(1)
+      TMIN=99.0e20
+      DO 100 I=1,N
+         TMIN=MIN(TMIN,A(I))
+  100 CONTINUE
+      TMP(1)=TMIN
+      CALL GOP(TMP,WORK,'m  ',1)
+      GLMIN = TMP(1)
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine gllog(la,lb)
+C
+C     If ANY LA=LB, then ALL LA=LB.
+C
+      LOGICAL LA,LB
+      DIMENSION TMP(1),WORK(1)
+ 
+      TMP(1)=1.0
+      IF (LB) THEN
+         IF (LA) TMP(1)=0.0
+      ELSE
+         IF (.NOT.LA) TMP(1)=0.0
+      ENDIF
+      CALL GOP(TMP,WORK,'*  ',1)
+      IF (TMP(1).EQ.0.0) LA=LB
+      return
+      END
+c-----------------------------------------------------------------------
+      function fmdian(a,n,ifok)
+C     find the Median of the (global) set A
+      include 'SIZE'
+      DIMENSION A(1)
+      DIMENSION WORK1(5),WORK2(5)
+      DIMENSION GUES(100)
+      LOGICAL IFOK
+ 
+      AMP  =1.5
+      AFAC =1.5
+      GMIN =GLMIN(A,N)
+      GMAX =GLMAX(A,N)
+      GMIN0=GLMIN(A,N)
+      GMAX0=GLMAX(A,N)
+      GUESS=(GMAX+GMIN)/2.0
+      EPS  =(GMAX-GMIN)
+      IF (EPS.EQ.0.0) THEN
+         FMDIAN=GMAX
+         return
+      ENDIF
+      WORK1(1)=N
+      CALL GOP(WORK1,WORK2,'+  ',1)
+      NTOT=WORK1(1)
+      N2 = (NTOT+1)/2
+      IF (.NOT.IFOK) THEN
+        WRITE(6,8) NID,N,(A(I),I=1,N)
+        WRITE(6,9) NID,NTOT,N2,N,GMIN,GMAX
+    8   FORMAT(I5,'N,A:',I5,10(6F10.5,/)) 
+    9   FORMAT(I5,'mnx:',3I6,2F10.5)
+      ENDIF
+C
+C     This is the trial loop
+C
+      ITRY=-1
+   10 CONTINUE
+      ITRY=ITRY+1
+      II=ITRY+1
+      IF (II.LE.100) GUES(II)=GUESS
+C     error check for infinite loop
+      IF (ITRY.GT.2*NTOT) GOTO 9000
+      CALL RZERO(WORK1,5)
+      NLT=0
+      NGT=0
+      CLT=GMIN0
+      CGT=GMAX0
+      DO 100 I=1,N
+         AA=A(I)
+         IF (AA.NE.GUESS) THEN
+            IF (AA.LT.GUESS) THEN
+               NLT=NLT+1
+C              CLT - closest value to GUESS Less Than GUESS
+               IF (AA.GT.CLT) CLT=AA
+            ENDIF
+            IF (AA.GT.GUESS) THEN
+               NGT=NGT+1
+C              CGT - closest value to GUESS Greater Than GUESS
+               IF (AA.LT.CGT) CGT=AA
+            ENDIF
+            DUM=1./(EPS+ABS(AA-GUESS))
+            WORK1(1)=WORK1(1)+DUM
+            WORK1(2)=WORK1(2)+DUM*AA
+         ELSE
+C           detected values equaling the guess.
+            WORK1(5)=WORK1(5)+1.0
+         ENDIF
+  100 CONTINUE
+C     Invoke vector reduction across processors:
+      WORK2(1)=CLT
+      CLT=GLMAX(WORK2,1)
+      WORK2(1)=CGT
+      CGT=GLMIN(WORK2,1)
+      WORK1(3)=NLT
+      WORK1(4)=NGT
+      CALL GOP(WORK1,WORK2,'+  ',5)
+      NLT=WORK1(3)
+      NGT=WORK1(4)
+      IF (.NOT.IFOK) THEN
+         WRITE(6,101) NID,GUESS,CLT,CGT
+         WRITE(6,102) NID,(WORK1(I),I=1,5)
+  101    FORMAT(I5,'Glg:',3F12.5)
+  102    FORMAT(I5,'WORK1:',5F12.5)
+      ENDIF
+C
+C     Done?
+C
+      IF (NLT.GT.N2.OR.NGT.GT.N2) THEN
+C        we're not done.....
+         IF (NGT.GT.NLT) THEN
+C           guess is too low
+            GMIN=CGT
+            G2=CGT+MAX(0.,WORK1(2)/WORK1(1)-GUESS)*AMP
+            IF (G2.GT.GMAX) G2=0.5*(GUESS+GMAX)
+            EPS=AFAC*ABS(G2-GUESS)
+C           see that we move at least as far as the next closest value.
+            GUESS=MAX(G2,CGT)
+            GOTO 10
+         ELSE IF (NLT.GT.NGT) THEN
+C           guess is too high
+            GMAX=CLT
+            G2=CLT+MIN(0.,WORK1(2)/WORK1(1)-GUESS)*AMP
+            IF (G2.LT.GMIN) G2=0.5*(GUESS+GMIN)
+            EPS=AFAC*ABS(G2-GUESS)
+C           see that we move at least as far as the next closest value.
+            GUESS=MIN(G2,CLT)
+            GOTO 10
+         ENDIF
+      ELSE
+C
+C        we're done....
+         IF (WORK1(5).NE.0) THEN
+C           the median is (usually) one of the values 
+            FMDIAN=GUESS
+            IF (WORK1(5).EQ.1.0) THEN
+               IF (MOD(NTOT,2).EQ.0) THEN
+                  IF (NGT.GT.NLT) THEN
+                     FMDIAN=0.5*(GUESS+CGT)
+                  ELSE
+                     FMDIAN=0.5*(GUESS+CLT)
+                  ENDIF
+               ELSE
+                  IF (NGT.EQ.NLT) THEN
+                     FMDIAN=GUESS
+                  ELSE IF(NGT.GT.NLT) THEN
+                     FMDIAN=CGT
+                  ELSE
+                     FMDIAN=CLT
+                  ENDIF
+               ENDIF
+            ENDIF
+         ELSE
+            IF (MOD(NTOT,2).EQ.0) THEN
+               IF (NGT.EQ.NLT) THEN
+                  FMDIAN=0.5*(CLT+CGT)
+               ELSE IF(NGT.GT.NLT) THEN
+                  FMDIAN=0.5*(GUESS+CGT)
+               ELSE
+                  FMDIAN=0.5*(GUESS+CLT)
+               ENDIF
+            ELSE
+               IF (NGT.EQ.NLT) THEN
+                  FMDIAN=GUESS
+               ELSE IF(NGT.GT.NLT) THEN
+                  FMDIAN=CGT
+               ELSE
+                  FMDIAN=CLT
+               ENDIF
+           ENDIF
+         ENDIF
+ 
+      ENDIF
+       IF (.NOT.IFOK) WRITE(6,*) NID,'FMDIAN2',FMDIAN,(A(I),I=1,N)
+      return
+C
+C     Error handling
+C
+ 9000 CONTINUE
+      WRITE(6,11) NTOT,GMIN0,GMAX0,GUESS
+   11 FORMAT('ABORTING IN FMDIAN: N,AMIN,AMAX:',I6,3G14.6)
+      DO 13 I1=1,N,5
+        IN=I1+5 
+        IN=MIN(IN,N)
+        WRITE(6,12) NID,(A(I),I=I1,IN)
+   12   FORMAT(I4,' FMA:',5G14.6)
+   13 CONTINUE
+      DO 15 I1=1,ITRY,5
+        IN=I1+5
+        IN=MIN(IN,ITRY)
+        WRITE(6,14) NID,(GUES(I),I=I1,IN)
+   14   FORMAT(I4,' FMG:',5G14.6)
+   15 CONTINUE
+      call exitt
+      END
+
+C========================================================================
+C     Double precision matrix and vector routines
+C========================================================================
+
+c-----------------------------------------------------------------------
+      subroutine dcadd(a,const,n)
+      real*8 A(1),CONST
+ 
+      DO 100 I=1,N
+         A(I)=A(I)+CONST
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine dsub2(a,b,n)
+      real*8 A(1), B(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)-B(I)
+ 100  CONTINUE
+      return
+      END
+ 
+c-----------------------------------------------------------------------
+      subroutine dadd2(a,b,n)
+      real*8 A(1), B(1)
+ 
+      DO 100 I=1,N
+         A(I)=A(I)+B(I)
+ 100  CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine chswapr(b,L,ind,n,temp)
+      INTEGER IND(1)
+      CHARACTER*6 B(1),TEMP(1)
+    
+C***  SORT ASSOCIATED ELEMENTS BY PUTTING ITEM(JJ)
+C***  INTO ITEM(I), WHERE JJ=IND(I).
+C***
+      DO 20 I=1,N
+         JJ=IND(I)
+         TEMP(I)=B(JJ)
+   20 CONTINUE
+      DO 30 I=1,N
+   30 B(I)=TEMP(I)
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine drcopy(r,d,N)
+      real*8    d(1)
+      dimension r(1)
+      do 10 i=1,n
+         r(i)=d(i)
+   10 continue
+      return
+      end
+      subroutine sorts(xout,xin,work,n)
+      real xout(1),xin(1),work(1)
+      call copy(xout,xin,n)
+      call sort(xout,work,n)
+      return
+      end
+C
+c-----------------------------------------------------------------------
+      function ivlsum(a,n)
+      INTEGER A(1)
+      INTEGER TSUM
+      if (n.eq.0) then
+         ivlsum = 0
+         return
+      endif
+      TSUM=A(1)
+      DO 100 I=2,N
+         TSUM=TSUM+A(I)
+  100 CONTINUE
+      IVLSUM=TSUM
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine icadd(a,c,n)
+      INTEGER A(1),C
+      DO 100 I = 1, N
+ 100     A(I) = A(I) + C
+      return
+      END
+      subroutine isort(a,ind,n)
+C
+C     Use Heap Sort (p 231 Num. Rec., 1st Ed.)
+C
+      integer a(1),ind(1)
+      integer aa
+ 
+      dO 10 j=1,n
+         ind(j)=j
+   10 continue
+ 
+      if (n.le.1) return
+      L=n/2+1
+      ir=n
+  100 continue
+         if (l.gt.1) then
+            l=l-1
+            aa  = a  (l)
+            ii  = ind(l)
+         else
+                 aa =   a(ir)
+                 ii = ind(ir)
+              a(ir) =   a( 1)
+            ind(ir) = ind( 1)
+            ir=ir-1
+            if (ir.eq.1) then
+                 a(1) = aa
+               ind(1) = ii
+               return
+            endif
+         endif
+         i=l
+         j=l+l
+  200    continue
+         if (j.le.ir) then
+            if (j.lt.ir) then
+               if ( a(j).lt.a(j+1) ) j=j+1
+            endif
+            if (aa.lt.a(j)) then
+                 a(i) = a(j)
+               ind(i) = ind(j)
+               i=j
+               j=j+j
+            else
+               j=ir+1
+            endif
+         GOTO 200
+         endif
+           a(i) = aa
+         ind(i) = ii
+      GOTO 100
+      end
+      subroutine sort(a,ind,n)
+C
+C     Use Heap Sort (p 231 Num. Rec., 1st Ed.)
+C
+      real a(1),aa
+      integer ind(1)
+ 
+      dO 10 j=1,n
+         ind(j)=j
+   10 continue
+ 
+      if (n.le.1) return
+      L=n/2+1
+      ir=n
+  100 continue
+         if (l.gt.1) then
+            l=l-1
+            aa  = a  (l)
+            ii  = ind(l)
+         else
+                 aa =   a(ir)
+                 ii = ind(ir)
+              a(ir) =   a( 1)
+            ind(ir) = ind( 1)
+            ir=ir-1
+            if (ir.eq.1) then
+                 a(1) = aa
+               ind(1) = ii
+               return
+            endif
+         endif
+         i=l
+         j=l+l
+  200    continue
+         if (j.le.ir) then
+            if (j.lt.ir) then
+               if ( a(j).lt.a(j+1) ) j=j+1
+            endif
+            if (aa.lt.a(j)) then
+                 a(i) = a(j)
+               ind(i) = ind(j)
+               i=j
+               j=j+j
+            else
+               j=ir+1
+            endif
+         GOTO 200
+         endif
+           a(i) = aa
+         ind(i) = ii
+      GOTO 100
+      end
+c-----------------------------------------------------------------------
+      subroutine iswap_ip(x,p,n)
+      integer x(1),xstart
+      integer p(1)
+c
+c     In-place permutation: x' = x(p)
+c
+      do k=1,n
+         if (p(k).gt.0) then   ! not swapped
+            xstart     = x(k)
+            loop_start = k
+            last       = k
+            do j=k,n
+               next    = p(last)
+               if (next.lt.0) then
+                  write(6,*) 'Hey! iswap_ip problem.',j,k,n,next
+                  call exitt
+               elseif (next.eq.loop_start) then
+                  x(last) = xstart
+                  p(last) = -p(last)
+                  goto 10
+               else
+                  x(last) = x(next)
+                  p(last) = -p(last)
+                  last    = next
+               endif
+            enddo
+   10       continue
+         endif
+      enddo
+c
+      do k=1,n
+         p(k) = -p(k)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine iswapt_ip(x,p,n)
+      integer x(1),t1,t2
+      integer p(1)
+c
+c     In-place permutation: x'(p) = x
+c
+
+      do k=1,n
+         if (p(k).gt.0) then   ! not swapped
+            loop_start = k
+            next       = p(loop_start)
+            t1         = x(loop_start)
+            do j=1,n
+               if (next.lt.0) then
+                  write(6,*) 'Hey! iswapt_ip problem.',j,k,n,next
+                  call exitt
+               elseif (next.eq.loop_start) then
+                  x(next) = t1
+                  p(next) = -p(next)
+                  goto 10
+               else
+                  t2      =  x(next)
+                  x(next) =  t1
+                  t1      =  t2
+                  nextp   =  p(next)
+                  p(next) = -p(next)
+                  next    =  nextp
+               endif
+            enddo
+   10       continue
+         endif
+      enddo
+ 
+      do k=1,n
+         p(k) = -p(k)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine swap_ip(x,p,n)
+      real    x(1),xstart
+      integer p(1)
+c
+c     In-place permutation: x' = x(p)
+c
+      do k=1,n
+         if (p(k).gt.0) then   ! not swapped
+            xstart     = x(k)
+            loop_start = k
+            last       = k
+            do j=k,n
+               next    = p(last)
+               if (next.lt.0) then
+                  write(6,*) 'Hey! swap_ip problem.',j,k,n,next
+                  call exitt
+               elseif (next.eq.loop_start) then
+                  x(last) = xstart
+                  p(last) = -p(last)
+                  goto 10
+               else
+                  x(last) = x(next)
+                  p(last) = -p(last)
+                  last    = next
+               endif
+            enddo
+   10       continue
+         endif
+      enddo
+ 
+      do k=1,n
+         p(k) = -p(k)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine swapt_ip(x,p,n)
+      real    x(1),t1,t2
+      integer p(1)
+c
+c     In-place permutation: x'(p) = x
+c
+
+      do k=1,n
+         if (p(k).gt.0) then   ! not swapped
+            loop_start = k
+            next       = p(loop_start)
+            t1         = x(loop_start)
+            do j=1,n
+               if (next.lt.0) then
+                  write(6,*) 'Hey! swapt_ip problem.',j,k,n,next
+                  call exitt
+               elseif (next.eq.loop_start) then
+                  x(next) = t1
+                  p(next) = -p(next)
+                  goto 10
+               else
+                  t2      =  x(next)
+                  x(next) =  t1
+                  t1      =  t2
+                  nextp   =  p(next)
+                  p(next) = -p(next)
+                  next    =  nextp
+               endif
+            enddo
+   10       continue
+         endif
+      enddo
+ 
+      do k=1,n
+         p(k) = -p(k)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine glvadd(x,w,n)
+      real x(1),w(1)
+      call gop(x,w,'+  ',1)
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine add3s12(x,y,z,c1,c2,n)
+      real x(1),y(1),z(1),c1,c2
+      do i=1,n
+         x(i) = c1*y(i)+c2*z(i)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      integer*8 function i8glmax(a,n)
+      integer*8 a(1),tmax
+      integer*8 tmp(1),work(1)
+      tmax= -999999
+      do i=1,n
+         tmax=max(tmax,a(i))
+      enddo
+      tmp(1)=tmax
+      call i8gop(tmp,work,'M  ',1)
+      i8glmax=tmp(1)
+      if (i8glmax .eq. -999999) i8glmax=0
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine admcol3(a,b,c,d,n)
+      REAL A(1),B(1),C(1),D
+C
+      DO 100 I=1,N
+         A(I)=A(I)+B(I)*C(I)*D
+  100 CONTINUE
+      return
+      END
+c-----------------------------------------------------------------------
+      subroutine add2col2(a,b,c,n)
+      real a(1),b(1),c(1)
+ 
+      do i=1,n
+         a(i) = a(i) + b(i)*c(i)
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine add2sxy(x,a,y,b,n)
+      real x(1),y(1)
+ 
+      do i=1,n
+         x(i) = a*x(i) + b*y(i)
+      enddo
+ 
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine col2s2(x,y,s,n)
+      real x(n),y(n)
+ 
+      do i=1,n
+         x(i)=s*x(i)*y(i)
+      enddo
+ 
+      return
+      end
+c-----------------------------------------------------------------------
+      INTEGER FUNCTION INDX1(S1,S2,L2)
+      CHARACTER*132 S1,S2
+ 
+      N1=132-L2+1
+      INDX1=0
+      IF (N1.LT.1) return
+ 
+      DO 100 I=1,N1
+         I2=I+L2-1
+         IF (S1(I:I2).EQ.S2(1:L2)) THEN
+            INDX1=I
+            return
+         ENDIF
+  100 CONTINUE
+ 
+      return
+      END
+c-----------------------------------------------------------------------
+
diff --git a/src/mpi_dummy.f b/src/mpi_dummy.f
new file mode 100644
index 0000000..f6257df
--- /dev/null
+++ b/src/mpi_dummy.f
@@ -0,0 +1,1053 @@
+c*********************************************************************72
+      subroutine mpi_scan(data1, data2, n, datatype,
+     &  operation, comm, ierror )
+
+      integer data1,data2  ! currently hardwired only for integer
+
+      data2 = data1
+
+      return
+      end
+
+c*********************************************************************72
+      subroutine mpi_abort ( comm, errorcode, ierror )
+
+c*********************************************************************72
+c
+cc MPI_ABORT shuts down the processes in a given communicator.
+c
+      implicit none
+
+      integer comm
+      integer errorcode
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_ABORT:'
+      write ( *, '(a,i12)' ) 
+     &  '  Shut down with error code = ', errorcode
+
+      stop
+      end
+      subroutine mpi_allgather ( data1, nsend, sendtype, data2, 
+     &  nrecv, recvtype, comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_ALLGATHER gathers data from all the processes in a communicator.
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer nsend
+
+      integer comm
+      integer data1(nsend)
+      integer data2(nsend)
+      integer ierror
+      integer nrecv
+      integer recvtype
+      integer sendtype
+
+      ierror = MPI_SUCCESS
+
+      if ( sendtype .eq. mpi_double_precision ) then
+        call mpi_copy_double_precision ( data1, data2, nsend, ierror )
+      else if ( sendtype .eq. mpi_integer ) then
+        call mpi_copy_integer ( data1, data2, nsend, ierror )
+      else if ( sendtype .eq. mpi_real ) then
+        call mpi_copy_real ( data1, data2, nsend, ierror )
+      else
+        ierror = MPI_FAILURE
+      end if
+
+      return
+      end
+      subroutine mpi_allgatherv ( data1, nsend, sendtype,
+     &  data2, nrecv, ndispls, recvtype, comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_ALLGATHERV gathers data from all the processes in a communicator.
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer nsend
+
+      integer comm
+      integer data1(nsend)
+      integer data2(nsend)
+      integer ierror
+      integer ndispls
+      integer nrecv
+      integer recvtype
+      integer sendtype
+
+      ierror = MPI_SUCCESS
+
+      if ( sendtype .eq. mpi_double_precision ) then
+        call mpi_copy_double_precision ( data1, data2, nsend, ierror )
+      else if ( sendtype .eq. mpi_integer ) then
+        call mpi_copy_integer ( data1, data2, nsend, ierror )
+      else if ( sendtype .eq. mpi_real ) then
+        call mpi_copy_real ( data1, data2, nsend, ierror )
+      else
+        ierror = MPI_FAILURE
+      end if
+
+      return
+      end
+      subroutine mpi_allreduce ( data1, data2, n, datatype,
+     &  operation, comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_ALLREDUCE carries out a reduction operation.
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer n
+
+      integer comm
+      integer data1(n)
+      integer data2(n)
+      integer datatype
+      integer ierror
+      integer operation
+
+      ierror = MPI_SUCCESS
+
+      if ( datatype .eq. mpi_double_precision ) then
+
+        call mpi_reduce_double_precision ( 
+     &    data1, data2, n, operation, ierror )
+
+      else if ( datatype .eq. mpi_integer ) then
+
+        call mpi_reduce_integer ( 
+     &    data1, data2, n, operation, ierror )
+
+      else if ( datatype .eq. mpi_integer8 ) then
+
+        call mpi_reduce_integer8( 
+     &    data1, data2, n, operation, ierror )
+
+      else if ( datatype .eq. mpi_real ) then
+
+        call mpi_reduce_real ( 
+     &    data1, data2, n, operation, ierror )
+
+      else
+
+        ierror = MPI_FAILURE
+
+      end if
+
+      return
+      end
+
+      subroutine mpi_barrier ( comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_BARRIER forces processes within a communicator to wait together.
+c
+      implicit none
+
+      integer comm
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      return
+      end
+      subroutine mpi_bcast ( data, n, datatype, node, comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_BCAST broadcasts data from one process to all others.
+c
+      implicit none
+
+      integer n
+
+      integer comm
+      integer data(n)
+      integer datatype
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+      integer node
+
+      ierror = MPI_SUCCESS
+
+      return
+      end
+      subroutine mpi_bsend ( data, n, datatype, iproc, itag,
+     &  comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_BSEND sends data from one process to another, using buffering.
+c
+      implicit none
+
+      integer n
+
+      integer comm
+      integer data(n)
+      integer datatype
+      integer ierror
+      integer iproc
+      integer itag
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_BSEND - Error!'
+      write ( *, '(a)' )  '  Should not send message to self.'
+
+      return
+      end
+      subroutine mpi_cart_create ( comm, ndims, dims, periods,
+     &  reorder, comm_cart, ierror )
+
+c*********************************************************************72
+c
+cc MPI_CART_CREATE creates a communicator for a Cartesian topology.
+c
+      implicit none
+
+      integer ndims
+
+      integer comm
+      integer comm_cart
+      integer dims(*)
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+      logical periods(*)
+      logical reorder
+
+      ierror = MPI_SUCCESS
+
+      return
+      end
+      subroutine mpi_cart_get ( comm, ndims, dims, periods,
+     &  coords, ierror )
+
+c*********************************************************************72
+c
+cc MPI_CART_GET returns the "Cartesian coordinates" of the calling process.
+c
+      implicit none
+
+      integer ndims
+
+      integer comm
+      integer coords(*)
+      integer dims(*)
+      integer i
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+      logical periods(*)
+
+      ierror = MPI_SUCCESS
+
+      do i = 1, ndims
+        coords(i) = 0
+      end do
+
+      return
+      end
+      subroutine mpi_cart_shift ( comm, idir, idisp, isource, 
+     &  idest, ierror )
+
+c*********************************************************************72
+c
+cc MPI_CART_SHIFT finds the destination and source for Cartesian shifts.
+c
+      implicit none
+
+      integer comm
+      integer idest
+      integer idir
+      integer idisp
+      integer ierror
+      integer isource
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+      isource = 0
+      idest = 0
+
+      return
+      end
+      subroutine mpi_comm_dup ( comm, comm_out, ierror )
+
+c*********************************************************************72
+c
+cc MPI_COMM_DUP duplicates a communicator.
+c
+      implicit none
+
+      integer comm
+      integer comm_out
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+      comm_out = comm
+
+      return
+      end
+      subroutine mpi_comm_free ( comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_COMM_FREE "frees" a communicator.
+c
+      implicit none
+
+      integer comm
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      return
+      end
+      subroutine mpi_comm_rank ( comm, me, ierror )
+
+c*********************************************************************72
+c
+cc MPI_COMM_RANK reports the rank of the calling process.
+c
+      implicit none
+
+      integer comm
+      integer ierror
+      integer me
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+      me = 0
+
+      return
+      end
+      subroutine mpi_comm_size ( comm, nprocs, ierror )
+
+c*********************************************************************72
+c
+cc MPI_COMM_SIZE reports the number of processes in a communicator.
+c
+      implicit none
+
+      integer comm
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+      integer nprocs
+
+      ierror = MPI_SUCCESS
+      nprocs = 1
+
+      return
+      end
+      subroutine mpi_comm_split ( comm, icolor, ikey, comm_new,
+     &  ierror )
+
+c*********************************************************************72
+c
+cc MPI_COMM_SPLIT splits up a communicator based on a key.
+c
+      implicit none
+
+      integer comm
+      integer comm_new
+      integer icolor
+      integer ierror
+      integer ikey
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      return
+      end
+      subroutine mpi_copy_double_precision ( data1, data2, n, ierror )
+
+c*********************************************************************72
+c
+cc MPI_COPY_DOUBLE copies a double precision vector.
+c
+      implicit none
+
+      integer n
+
+      double precision data1(n)
+      double precision data2(n)
+      integer i
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      do i = 1, n
+        data2(i) = data1(i)
+      end do
+
+      return
+      end
+      subroutine mpi_copy_integer ( data1, data2, n, ierror )
+
+c*********************************************************************72
+c
+cc MPI_COPY_INTEGER copies an integer vector.
+c
+      implicit none
+
+      integer n
+
+      integer data1(n)
+      integer data2(n)
+      integer i
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      do i = 1, n
+        data2(i) = data1(i)
+      end do
+
+      return
+      end
+      subroutine mpi_copy_real ( data1, data2, n, ierror )
+
+c*********************************************************************72
+c
+      implicit none
+
+      integer n
+
+      real data1(n)
+      real data2(n)
+      integer i
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      do i = 1, n
+        data2(i) = data1(i)
+      end do
+
+      return
+      end
+      subroutine mpi_finalize ( ierror )
+
+c*********************************************************************72
+c
+cc MPI_FINALIZE shuts down the MPI library.
+c
+      implicit none
+
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      return
+      end
+      subroutine mpi_get_count ( istatus, datatype, icount, ierror )
+
+c*********************************************************************72
+c
+cc MPI_GET_COUNT reports the actual number of items transmitted.
+c
+      implicit none
+
+      integer datatype
+      integer icount
+      integer ierror
+      integer istatus
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_GET_COUNT - Error!'
+      write ( *, '(a)' ) '  Should not query message from self.'
+
+      return
+      end
+      subroutine mpi_init ( ierror )
+
+c*********************************************************************72
+c
+cc MPI_INIT initializes the MPI library.
+c
+      implicit none
+
+      integer ierror
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_SUCCESS
+
+      return
+      end
+      subroutine mpi_irecv ( data, n, datatype, iproc, itag,
+     &  comm, irequest, ierror )
+
+c*********************************************************************72
+c
+cc MPI_IRECV receives data from another process.
+c
+      implicit none
+
+      integer n
+
+      integer comm
+      integer data(n)
+      integer datatype
+      integer ierror
+      integer iproc
+      integer irequest
+      integer itag
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_IRECV - Error!'
+      write ( *, '(a)' ) '  Should not recv message from self.'
+
+      return
+      end
+      subroutine mpi_isend ( data, n, datatype, iproc, itag,
+     &  comm, request, ierror )
+
+c*********************************************************************72
+c
+cc MPI_ISEND sends data from one process to another using nonblocking transmission.
+c
+      implicit none
+
+      integer n
+
+      integer comm
+      integer data(n)
+      integer datatype
+      integer ierror
+      integer iproc
+      integer itag
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+      integer request
+
+      request = 0
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_ISEND - Error!'
+      write ( *, '(a)' )  '  Should not send message to self.'
+
+      return
+      end
+      subroutine mpi_recv ( data, n, datatype, iproc, itag,
+     &  comm, istatus, ierror )
+
+c*********************************************************************72
+c
+cc MPI_RECV receives data from another process within a communicator.
+c
+      implicit none
+
+      integer n
+
+      integer comm
+      integer data(n)
+      integer datatype
+      integer ierror
+      integer iproc
+      integer istatus
+      integer itag
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_RECV - Error!'
+      write ( *, '(a)' ) '  Should not recv message from self.'
+
+      return
+      end
+      subroutine mpi_reduce ( data1, data2, n, datatype, operation,
+     &  receiver, comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_REDUCE carries out a reduction operation.
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer n
+
+      integer comm
+      integer data1(n)
+      integer data2
+      integer datatype
+      integer ierror
+      integer operation
+      integer receiver
+
+      ierror = MPI_SUCCESS
+
+      if ( datatype .eq. mpi_double_precision ) then
+
+        call mpi_reduce_double_precision ( 
+     &    data1, data2, n, operation, ierror )
+
+      else if ( datatype .eq. mpi_integer ) then
+
+        call mpi_reduce_integer ( 
+     &    data1, data2, n, operation, ierror )
+
+      else if ( datatype .eq. mpi_real ) then
+
+        call mpi_reduce_real ( 
+     &    data1, data2, n, operation, ierror )
+
+      else
+
+        ierror = MPI_FAILURE
+
+      end if
+
+      return
+      end
+      subroutine mpi_reduce_double_precision ( 
+     &  data1, data2, n, operation, ierror )
+
+c*********************************************************************72
+c
+cc MPI_REDUCE_DOUBLE_PRECISION carries out a reduction operation on double precision values.
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer n
+
+      double precision data1(n)
+      double precision data2(n)
+      integer i
+      integer ierror
+      integer operation
+
+
+      ierror = MPI_SUCCESS
+
+      do i = 1, n
+        data2(i) = data1(i)
+      end do
+
+      return
+      end
+
+      subroutine mpi_reduce_integer8 ( 
+     &  data1, data2, n, operation, ierror )
+
+c*********************************************************************72
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer n
+
+      integer*8 data1(n)
+      integer*8 data2(n)
+      integer i
+      integer ierror
+      integer operation
+
+      ierror = MPI_SUCCESS
+
+      do i = 1, n
+         data2(i) = data1(i)
+      end do
+
+      ierror = MPI_FAILURE
+
+      return
+      end
+ 
+      subroutine mpi_reduce_integer ( 
+     &  data1, data2, n, operation, ierror )
+
+c*********************************************************************72
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer n
+
+      integer data1(n)
+      integer data2(n)
+      integer i
+      integer ierror
+      integer operation
+
+      ierror = MPI_SUCCESS
+
+      do i = 1, n
+         data2(i) = data1(i)
+      end do
+
+      ierror = MPI_FAILURE
+
+      return
+      end
+
+      subroutine mpi_reduce_real ( 
+     &  data1, data2, n, operation, ierror )
+
+c*********************************************************************72
+c
+cc MPI_REDUCE_REAL carries out a reduction operation on reals.
+c
+c  Discussion:
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer n
+
+      real data1(n)
+      real data2(n)
+      integer i
+      integer ierror
+      integer operation
+
+      ierror = MPI_SUCCESS
+
+        do i = 1, n
+          data2(i) = data1(i)
+        end do
+
+      return
+      end
+      subroutine mpi_reduce_scatter ( data1, data2, n, datatype,
+     &  operation, comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_REDUCE_SCATTER collects a message of the same length from each process.
+c
+      implicit none
+
+      include "mpi_dummy.h"
+
+      integer n
+
+      integer comm
+      integer data1(n)
+      integer data2(n)
+      integer datatype
+      integer ierror
+      integer operation
+
+      ierror = MPI_SUCCESS
+
+      if ( datatype .eq. mpi_double_precision ) then
+        call mpi_copy_double_precision ( data1, data2, n, ierror )
+      else if ( datatype .eq. mpi_integer ) then
+        call mpi_copy_integer ( data1, data2, n, ierror )
+      else if ( datatype .eq. mpi_real ) then
+        call mpi_copy_real ( data1, data2, n, ierror )
+      else
+        ierror = MPI_FAILURE
+      end if
+
+      return
+      end
+      subroutine mpi_rsend ( data, n, datatype, iproc, itag,
+     &  comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_RSEND "ready sends" data from one process to another.
+c
+      implicit none
+
+      integer n
+
+      integer comm
+      integer data(n)
+      integer datatype
+      integer ierror
+      integer iproc
+      integer itag
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_RSEND - Error!'
+      write ( *, '(a)' ) '  Should not send message to self.'
+
+      return
+      end
+      subroutine mpi_send ( data, n, datatype, iproc, itag,
+     &  comm, ierror )
+
+c*********************************************************************72
+c
+cc MPI_SEND sends data from one process to another.
+c
+      implicit none
+
+      integer n
+
+      integer comm
+      integer data(n)
+      integer datatype
+      integer ierror
+      integer iproc
+      integer itag
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_SEND - Error!'
+      write ( *, '(a)' )  '  Should not send message to self.'
+
+      return
+      end
+      subroutine mpi_wait ( irequest, istatus, ierror )
+
+c*********************************************************************72
+c
+cc MPI_WAIT waits for an I/O request to complete.
+c
+      implicit none
+
+      integer ierror
+      integer irequest
+      integer istatus
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_WAIT - Error!'
+      write ( *, '(a)' ) '  Should not wait on message from self.'
+
+      return
+      end
+      subroutine mpi_waitall ( icount, irequest, istatus, ierror )
+
+c*********************************************************************72
+c
+cc MPI_WAITALL waits until all I/O requests have completed.
+c
+      implicit none
+
+      integer icount
+      integer ierror
+      integer irequest
+      integer istatus
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_WAITALL - Error!'
+      write ( *, '(a)' ) '  Should not wait on message from self.'
+
+      return
+      end
+      subroutine mpi_waitany ( icount, array_of_requests, index, 
+     &  istatus, ierror )
+
+c*********************************************************************72
+c
+cc MPI_WAITANY waits until one I/O requests has completed.
+c
+      implicit none
+
+      integer array_of_requests(*)
+      integer icount
+      integer ierror
+      integer index
+      integer istatus
+      integer MPI_FAILURE
+      parameter ( MPI_FAILURE = 1 )
+      integer MPI_SUCCESS
+      parameter ( MPI_SUCCESS = 0 )
+
+      ierror = MPI_FAILURE
+
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'MPI_WAITANY - Error!'
+      write ( *, '(a)' ) '  Should not wait on message from self.'
+
+      return
+      end
+      function mpi_wtick ( )
+
+c*********************************************************************72
+c
+cc MPI_WTICK returns the time between clock ticks.
+c
+      implicit none
+      
+      double precision mpi_wtick
+      
+      mpi_wtick = 1.0D+00
+      
+      return
+      end
+      function mpi_wtime ( )
+
+c*********************************************************************72
+c
+cc MPI_WTIME returns the elapsed wall clock time.
+c
+      implicit none
+
+      real*8  mpi_wtime
+      real*8 a
+      integer*8 countval, countrate, countmax
+
+      call system_clock(countval, countrate, countmax)
+      a = countval
+      mpi_wtime = a/countrate
+
+      return
+      end
+
+      subroutine mpi_initialized(mpi_is_initialized, ierr)
+
+      mpi_is_initialized = 0 
+      ierr = 0
+
+      return
+      end
+
+      subroutine mpi_comm_create(icomm,igroup,icommd,ierr)
+
+      icommd = 1
+
+      return
+      end
+
+      subroutine mpi_comm_group(icomm,igroup,ierr)
+
+      igroup = 1
+      ierr = 0
+
+      return
+      end
+
+      subroutine mpi_group_free
+
+      return
+      end
+
+      subroutine mpi_attr_get(icomm,ikey,ival,iflag,ierr)
+ 
+      logical iflag
+
+      ival =  999 999 999  ! dummy
+ 
+      return
+      end
+c-----------------------------------------------------------------------
diff --git a/src/mpi_dummy.h b/src/mpi_dummy.h
new file mode 100644
index 0000000..0a92b81
--- /dev/null
+++ b/src/mpi_dummy.h
@@ -0,0 +1,61 @@
+c
+c  Dummy parameters for MPI F77 stubs
+c
+      integer mpi_comm_world
+      parameter ( mpi_comm_world = 0 )
+c
+c  Return values.
+c
+      integer mpi_failure
+      parameter ( mpi_failure = 1 )
+      integer mpi_success
+      parameter ( mpi_success = 0 )
+c
+c  recv message status
+c
+      integer mpi_status_size
+      parameter ( mpi_status_size = 3 )
+      integer mpi_source
+      parameter ( mpi_source = 1 )
+      integer mpi_tag
+      parameter ( mpi_tag = 2 )
+      integer mpi_count
+      parameter ( mpi_count = 3 )
+c
+c  recv flags
+c
+      integer mpi_any_source
+      parameter ( mpi_any_source = -1 )
+      integer mpi_any_tag
+      parameter ( mpi_any_tag = -1 )
+c
+c  data types and sizes
+c
+      integer mpi_integer
+      parameter ( mpi_integer = 1 )
+      integer mpi_integer8
+      parameter ( mpi_integer8 = 6 )
+      integer mpi_real
+      parameter ( mpi_real = 2 )
+      integer mpi_double_precision
+      parameter ( mpi_double_precision = 3 )
+      integer mpi_logical
+      parameter ( mpi_logical = 4 )
+      integer mpi_character
+      parameter ( mpi_character = 5 )
+c
+c  allreduce operations
+c
+      integer mpi_sum
+      parameter ( mpi_sum = 1 )
+      integer mpi_max
+      parameter ( mpi_max = 2 )
+      integer mpi_min
+      parameter ( mpi_min = 3 )
+      integer mpi_product
+      parameter ( mpi_product = 4 )
+c
+c  timer
+c
+      external mpi_wtime
+      real*8 mpi_wtime
diff --git a/src/mxm_std.f b/src/mxm_std.f
new file mode 100644
index 0000000..5e21cb3
--- /dev/null
+++ b/src/mxm_std.f
@@ -0,0 +1,4123 @@
+      subroutine mxmf2(A,N1,B,N2,C,N3)
+c
+c     unrolled loop version 
+c
+      real a(n1,n2),b(n2,n3),c(n1,n3)
+
+      if (n2.le.8) then
+         if (n2.eq.1) then
+            call mxf1(a,n1,b,n2,c,n3)
+         elseif (n2.eq.2) then
+            call mxf2(a,n1,b,n2,c,n3)
+         elseif (n2.eq.3) then
+            call mxf3(a,n1,b,n2,c,n3)
+         elseif (n2.eq.4) then
+            call mxf4(a,n1,b,n2,c,n3)
+         elseif (n2.eq.5) then
+            call mxf5(a,n1,b,n2,c,n3)
+         elseif (n2.eq.6) then
+            call mxf6(a,n1,b,n2,c,n3)
+         elseif (n2.eq.7) then
+            call mxf7(a,n1,b,n2,c,n3)
+         else
+            call mxf8(a,n1,b,n2,c,n3)
+         endif
+      elseif (n2.le.16) then
+         if (n2.eq.9) then
+            call mxf9(a,n1,b,n2,c,n3)
+         elseif (n2.eq.10) then
+            call mxf10(a,n1,b,n2,c,n3)
+         elseif (n2.eq.11) then
+            call mxf11(a,n1,b,n2,c,n3)
+         elseif (n2.eq.12) then
+            call mxf12(a,n1,b,n2,c,n3)
+         elseif (n2.eq.13) then
+            call mxf13(a,n1,b,n2,c,n3)
+         elseif (n2.eq.14) then
+            call mxf14(a,n1,b,n2,c,n3)
+         elseif (n2.eq.15) then
+            call mxf15(a,n1,b,n2,c,n3)
+         else
+            call mxf16(a,n1,b,n2,c,n3)
+         endif
+      elseif (n2.le.24) then
+         if (n2.eq.17) then
+            call mxf17(a,n1,b,n2,c,n3)
+         elseif (n2.eq.18) then
+            call mxf18(a,n1,b,n2,c,n3)
+         elseif (n2.eq.19) then
+            call mxf19(a,n1,b,n2,c,n3)
+         elseif (n2.eq.20) then
+            call mxf20(a,n1,b,n2,c,n3)
+         elseif (n2.eq.21) then
+            call mxf21(a,n1,b,n2,c,n3)
+         elseif (n2.eq.22) then
+            call mxf22(a,n1,b,n2,c,n3)
+         elseif (n2.eq.23) then
+            call mxf23(a,n1,b,n2,c,n3)
+         elseif (n2.eq.24) then
+            call mxf24(a,n1,b,n2,c,n3)
+         endif
+      else
+         call mxm44_0(a,n1,b,n2,c,n3)
+      endif
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf1(a,n1,b,n2,c,n3)
+c
+      real a(n1,1),b(1,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf2(a,n1,b,n2,c,n3)
+c
+      real a(n1,2),b(2,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf3(a,n1,b,n2,c,n3)
+c
+      real a(n1,3),b(3,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf4(a,n1,b,n2,c,n3)
+c
+      real a(n1,4),b(4,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf5(a,n1,b,n2,c,n3)
+c
+      real a(n1,5),b(5,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf6(a,n1,b,n2,c,n3)
+c
+      real a(n1,6),b(6,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf7(a,n1,b,n2,c,n3)
+c
+      real a(n1,7),b(7,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf8(a,n1,b,n2,c,n3)
+c
+      real a(n1,8),b(8,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf9(a,n1,b,n2,c,n3)
+c
+      real a(n1,9),b(9,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf10(a,n1,b,n2,c,n3)
+c
+      real a(n1,10),b(10,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf11(a,n1,b,n2,c,n3)
+c
+      real a(n1,11),b(11,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf12(a,n1,b,n2,c,n3)
+c
+      real a(n1,12),b(12,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf13(a,n1,b,n2,c,n3)
+c
+      real a(n1,13),b(13,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf14(a,n1,b,n2,c,n3)
+c
+      real a(n1,14),b(14,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf15(a,n1,b,n2,c,n3)
+c
+      real a(n1,15),b(15,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf16(a,n1,b,n2,c,n3)
+c
+      real a(n1,16),b(16,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf17(a,n1,b,n2,c,n3)
+c
+      real a(n1,17),b(17,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf18(a,n1,b,n2,c,n3)
+c
+      real a(n1,18),b(18,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf19(a,n1,b,n2,c,n3)
+c
+      real a(n1,19),b(19,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf20(a,n1,b,n2,c,n3)
+c
+      real a(n1,20),b(20,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf21(a,n1,b,n2,c,n3)
+c
+      real a(n1,21),b(21,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf22(a,n1,b,n2,c,n3)
+c
+      real a(n1,22),b(22,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf23(a,n1,b,n2,c,n3)
+c
+      real a(n1,23),b(23,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+     $             + a(i,23)*b(23,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxf24(a,n1,b,n2,c,n3)
+c
+      real a(n1,24),b(24,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+     $             + a(i,23)*b(23,j)
+     $             + a(i,24)*b(24,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxm44_0(a, m, b, k, c, n)
+c
+c matrix multiply with a 4x4 pencil 
+c
+      real a(m,k), b(k,n), c(m,n)
+      real s11, s12, s13, s14, s21, s22, s23, s24
+      real s31, s32, s33, s34, s41, s42, s43, s44
+
+      mresid = iand(m,3) 
+      nresid = iand(n,3) 
+      m1 = m - mresid + 1
+      n1 = n - nresid + 1
+
+      do i=1,m-mresid,4
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          s41 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+          s42 = 0.0d0
+          s13 = 0.0d0
+          s23 = 0.0d0
+          s33 = 0.0d0
+          s43 = 0.0d0
+          s14 = 0.0d0
+          s24 = 0.0d0
+          s34 = 0.0d0
+          s44 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(i,l)*b(l,j)
+            s12 = s12 + a(i,l)*b(l,j+1)
+            s13 = s13 + a(i,l)*b(l,j+2)
+            s14 = s14 + a(i,l)*b(l,j+3)
+
+            s21 = s21 + a(i+1,l)*b(l,j)
+            s22 = s22 + a(i+1,l)*b(l,j+1)
+            s23 = s23 + a(i+1,l)*b(l,j+2)
+            s24 = s24 + a(i+1,l)*b(l,j+3)
+
+            s31 = s31 + a(i+2,l)*b(l,j)
+            s32 = s32 + a(i+2,l)*b(l,j+1)
+            s33 = s33 + a(i+2,l)*b(l,j+2)
+            s34 = s34 + a(i+2,l)*b(l,j+3)
+
+            s41 = s41 + a(i+3,l)*b(l,j)
+            s42 = s42 + a(i+3,l)*b(l,j+1)
+            s43 = s43 + a(i+3,l)*b(l,j+2)
+            s44 = s44 + a(i+3,l)*b(l,j+3)
+          enddo
+          c(i,j)     = s11 
+          c(i,j+1)   = s12 
+          c(i,j+2)   = s13
+          c(i,j+3)   = s14
+
+          c(i+1,j)   = s21 
+          c(i+2,j)   = s31 
+          c(i+3,j)   = s41 
+
+          c(i+1,j+1) = s22
+          c(i+2,j+1) = s32
+          c(i+3,j+1) = s42
+
+          c(i+1,j+2) = s23
+          c(i+2,j+2) = s33
+          c(i+3,j+2) = s43
+
+          c(i+1,j+3) = s24
+          c(i+2,j+3) = s34
+          c(i+3,j+3) = s44
+        enddo
+* Residual when n is not multiple of 4
+        if (nresid .ne. 0) then
+          if (nresid .eq. 1) then
+            s11 = 0.0d0
+            s21 = 0.0d0
+            s31 = 0.0d0
+            s41 = 0.0d0
+            do l=1,k
+              s11 = s11 + a(i,l)*b(l,n)
+              s21 = s21 + a(i+1,l)*b(l,n)
+              s31 = s31 + a(i+2,l)*b(l,n)
+              s41 = s41 + a(i+3,l)*b(l,n)
+            enddo
+            c(i,n)     = s11 
+            c(i+1,n)   = s21 
+            c(i+2,n)   = s31 
+            c(i+3,n)   = s41 
+          elseif (nresid .eq. 2) then
+            s11 = 0.0d0
+            s21 = 0.0d0
+            s31 = 0.0d0
+            s41 = 0.0d0
+            s12 = 0.0d0
+            s22 = 0.0d0
+            s32 = 0.0d0
+            s42 = 0.0d0
+            do l=1,k
+              s11 = s11 + a(i,l)*b(l,j)
+              s12 = s12 + a(i,l)*b(l,j+1)
+
+              s21 = s21 + a(i+1,l)*b(l,j)
+              s22 = s22 + a(i+1,l)*b(l,j+1)
+
+              s31 = s31 + a(i+2,l)*b(l,j)
+              s32 = s32 + a(i+2,l)*b(l,j+1)
+
+              s41 = s41 + a(i+3,l)*b(l,j)
+              s42 = s42 + a(i+3,l)*b(l,j+1)
+            enddo
+            c(i,j)     = s11 
+            c(i,j+1)   = s12
+
+            c(i+1,j)   = s21 
+            c(i+2,j)   = s31 
+            c(i+3,j)   = s41 
+
+            c(i+1,j+1) = s22
+            c(i+2,j+1) = s32
+            c(i+3,j+1) = s42
+          else
+            s11 = 0.0d0
+            s21 = 0.0d0
+            s31 = 0.0d0
+            s41 = 0.0d0
+            s12 = 0.0d0
+            s22 = 0.0d0
+            s32 = 0.0d0
+            s42 = 0.0d0
+            s13 = 0.0d0
+            s23 = 0.0d0
+            s33 = 0.0d0
+            s43 = 0.0d0
+            do l=1,k
+              s11 = s11 + a(i,l)*b(l,j)
+              s12 = s12 + a(i,l)*b(l,j+1)
+              s13 = s13 + a(i,l)*b(l,j+2)
+
+              s21 = s21 + a(i+1,l)*b(l,j)
+              s22 = s22 + a(i+1,l)*b(l,j+1)
+              s23 = s23 + a(i+1,l)*b(l,j+2)
+
+              s31 = s31 + a(i+2,l)*b(l,j)
+              s32 = s32 + a(i+2,l)*b(l,j+1)
+              s33 = s33 + a(i+2,l)*b(l,j+2)
+
+              s41 = s41 + a(i+3,l)*b(l,j)
+              s42 = s42 + a(i+3,l)*b(l,j+1)
+              s43 = s43 + a(i+3,l)*b(l,j+2)
+            enddo
+            c(i,j)     = s11 
+            c(i+1,j)   = s21 
+            c(i+2,j)   = s31 
+            c(i+3,j)   = s41 
+            c(i,j+1)   = s12 
+            c(i+1,j+1) = s22
+            c(i+2,j+1) = s32
+            c(i+3,j+1) = s42
+            c(i,j+2)   = s13
+            c(i+1,j+2) = s23
+            c(i+2,j+2) = s33
+            c(i+3,j+2) = s43
+          endif
+        endif
+      enddo
+
+* Residual when m is not multiple of 4
+      if (mresid .eq. 0) then
+        return
+      elseif (mresid .eq. 1) then
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s12 = 0.0d0
+          s13 = 0.0d0
+          s14 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,j)
+            s12 = s12 + a(m,l)*b(l,j+1)
+            s13 = s13 + a(m,l)*b(l,j+2)
+            s14 = s14 + a(m,l)*b(l,j+3)
+          enddo
+          c(m,j)     = s11 
+          c(m,j+1)   = s12 
+          c(m,j+2)   = s13
+          c(m,j+3)   = s14
+        enddo
+* mresid is 1, check nresid
+        if (nresid .eq. 0) then
+          return
+        elseif (nresid .eq. 1) then
+          s11 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,n)
+          enddo
+          c(m,n) = s11
+          return
+        elseif (nresid .eq. 2) then
+          s11 = 0.0d0
+          s12 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,n-1)
+            s12 = s12 + a(m,l)*b(l,n)
+          enddo
+          c(m,n-1) = s11
+          c(m,n) = s12
+          return
+        else
+          s11 = 0.0d0
+          s12 = 0.0d0
+          s13 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,n-2)
+            s12 = s12 + a(m,l)*b(l,n-1)
+            s13 = s13 + a(m,l)*b(l,n)
+          enddo
+          c(m,n-2) = s11
+          c(m,n-1) = s12
+          c(m,n) = s13
+          return
+        endif          
+      elseif (mresid .eq. 2) then
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s12 = 0.0d0
+          s13 = 0.0d0
+          s14 = 0.0d0
+          s21 = 0.0d0
+          s22 = 0.0d0
+          s23 = 0.0d0
+          s24 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,j)
+            s12 = s12 + a(m-1,l)*b(l,j+1)
+            s13 = s13 + a(m-1,l)*b(l,j+2)
+            s14 = s14 + a(m-1,l)*b(l,j+3)
+
+            s21 = s21 + a(m,l)*b(l,j)
+            s22 = s22 + a(m,l)*b(l,j+1)
+            s23 = s23 + a(m,l)*b(l,j+2)
+            s24 = s24 + a(m,l)*b(l,j+3)
+          enddo
+          c(m-1,j)   = s11 
+          c(m-1,j+1) = s12 
+          c(m-1,j+2) = s13
+          c(m-1,j+3) = s14
+          c(m,j)     = s21
+          c(m,j+1)   = s22 
+          c(m,j+2)   = s23
+          c(m,j+3)   = s24
+        enddo
+* mresid is 2, check nresid
+        if (nresid .eq. 0) then
+          return
+        elseif (nresid .eq. 1) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,n)
+            s21 = s21 + a(m,l)*b(l,n)
+          enddo
+          c(m-1,n) = s11
+          c(m,n) = s21
+          return
+        elseif (nresid .eq. 2) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,n-1)
+            s12 = s12 + a(m-1,l)*b(l,n)
+            s21 = s21 + a(m,l)*b(l,n-1)
+            s22 = s22 + a(m,l)*b(l,n)
+          enddo
+          c(m-1,n-1) = s11
+          c(m-1,n)   = s12
+          c(m,n-1)   = s21
+          c(m,n)     = s22
+          return
+        else
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s13 = 0.0d0
+          s23 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,n-2)
+            s12 = s12 + a(m-1,l)*b(l,n-1)
+            s13 = s13 + a(m-1,l)*b(l,n)
+            s21 = s21 + a(m,l)*b(l,n-2)
+            s22 = s22 + a(m,l)*b(l,n-1)
+            s23 = s23 + a(m,l)*b(l,n)
+          enddo
+          c(m-1,n-2) = s11
+          c(m-1,n-1) = s12
+          c(m-1,n)   = s13
+          c(m,n-2)   = s21
+          c(m,n-1)   = s22
+          c(m,n)     = s23
+          return
+        endif
+      else
+* mresid is 3
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+
+          s13 = 0.0d0
+          s23 = 0.0d0
+          s33 = 0.0d0
+
+          s14 = 0.0d0
+          s24 = 0.0d0
+          s34 = 0.0d0
+
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,j)
+            s12 = s12 + a(m-2,l)*b(l,j+1)
+            s13 = s13 + a(m-2,l)*b(l,j+2)
+            s14 = s14 + a(m-2,l)*b(l,j+3)
+
+            s21 = s21 + a(m-1,l)*b(l,j)
+            s22 = s22 + a(m-1,l)*b(l,j+1)
+            s23 = s23 + a(m-1,l)*b(l,j+2)
+            s24 = s24 + a(m-1,l)*b(l,j+3)
+
+            s31 = s31 + a(m,l)*b(l,j)
+            s32 = s32 + a(m,l)*b(l,j+1)
+            s33 = s33 + a(m,l)*b(l,j+2)
+            s34 = s34 + a(m,l)*b(l,j+3)
+          enddo
+          c(m-2,j)   = s11 
+          c(m-2,j+1) = s12 
+          c(m-2,j+2) = s13
+          c(m-2,j+3) = s14
+
+          c(m-1,j)   = s21 
+          c(m-1,j+1) = s22
+          c(m-1,j+2) = s23
+          c(m-1,j+3) = s24
+
+          c(m,j)     = s31 
+          c(m,j+1)   = s32
+          c(m,j+2)   = s33
+          c(m,j+3)   = s34
+        enddo
+* mresid is 3, check nresid
+        if (nresid .eq. 0) then
+          return
+        elseif (nresid .eq. 1) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,n)
+            s21 = s21 + a(m-1,l)*b(l,n)
+            s31 = s31 + a(m,l)*b(l,n)
+          enddo
+          c(m-2,n) = s11
+          c(m-1,n) = s21
+          c(m,n) = s31
+          return
+        elseif (nresid .eq. 2) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,n-1)
+            s12 = s12 + a(m-2,l)*b(l,n)
+            s21 = s21 + a(m-1,l)*b(l,n-1)
+            s22 = s22 + a(m-1,l)*b(l,n)
+            s31 = s31 + a(m,l)*b(l,n-1)
+            s32 = s32 + a(m,l)*b(l,n)
+          enddo
+          c(m-2,n-1) = s11
+          c(m-2,n)   = s12
+          c(m-1,n-1) = s21
+          c(m-1,n)   = s22
+          c(m,n-1)   = s31
+          c(m,n)     = s32
+          return
+        else
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+          s13 = 0.0d0
+          s23 = 0.0d0
+          s33 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,n-2)
+            s12 = s12 + a(m-2,l)*b(l,n-1)
+            s13 = s13 + a(m-2,l)*b(l,n)
+            s21 = s21 + a(m-1,l)*b(l,n-2)
+            s22 = s22 + a(m-1,l)*b(l,n-1)
+            s23 = s23 + a(m-1,l)*b(l,n)
+            s31 = s31 + a(m,l)*b(l,n-2)
+            s32 = s32 + a(m,l)*b(l,n-1)
+            s33 = s33 + a(m,l)*b(l,n)
+          enddo
+          c(m-2,n-2) = s11
+          c(m-2,n-1) = s12
+          c(m-2,n)   = s13
+          c(m-1,n-2) = s21
+          c(m-1,n-1) = s22
+          c(m-1,n)   = s23
+          c(m,n-2)   = s31
+          c(m,n-1)   = s32
+          c(m,n)     = s33
+          return
+        endif
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxm44_2(a, m, b, k, c, n)
+      real a(m,2), b(2,n), c(m,n)
+
+      nresid = iand(n,3) 
+      n1 = n - nresid + 1
+
+      do j=1,n-nresid,4
+         do i=1,m
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+            c(i,j+1) = a(i,1)*b(1,j+1)
+     $             + a(i,2)*b(2,j+1)
+            c(i,j+2) = a(i,1)*b(1,j+2)
+     $             + a(i,2)*b(2,j+2)
+            c(i,j+3) = a(i,1)*b(1,j+3)
+     $             + a(i,2)*b(2,j+3)
+         enddo
+      enddo
+      if (nresid .eq. 0) then
+        return
+      elseif (nresid .eq. 1) then
+         do i=1,m
+            c(i,n) = a(i,1)*b(1,n)
+     $             + a(i,2)*b(2,n)
+         enddo
+      elseif (nresid .eq. 2) then
+         do i=1,m
+            c(i,n-1) = a(i,1)*b(1,n-1)
+     $             + a(i,2)*b(2,n-1)
+            c(i,n) = a(i,1)*b(1,n)
+     $             + a(i,2)*b(2,n)
+         enddo
+      else
+         do i=1,m
+            c(i,n-2) = a(i,1)*b(1,n-2)
+     $             + a(i,2)*b(2,n-2)
+            c(i,n-1) = a(i,1)*b(1,n-1)
+     $             + a(i,2)*b(2,n-1)
+            c(i,n) = a(i,1)*b(1,n)
+     $             + a(i,2)*b(2,n)
+         enddo
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxm_test_all(nid,ivb)
+c
+c     Collect matrix-matrix product statistics
+c
+      external mxms,mxmur2,mxmur3,mxmd,mxmfb,mxmf3,mxmu4,mxmn2
+      external mxmk2,mxmtr,mxmrg,madd,mxm,mxm44
+c
+      parameter (nn=24)
+      parameter (nt=10)
+      character*5 c(3,nt)
+      real        s(nn,2,nt,3)
+      real        a(nn,2,nt,3)
+
+      call nekgsync
+
+      do k=1,3   ! 3 tests:  N^2 x N, NxN, NxN^2
+         call mxmtest(s(1,1, 1,k),nn,c(k, 1),mxm44 ,'mxm44',k,ivb)
+         call mxmtest(s(1,1, 2,k),nn,c(k, 2),mxms  ,' std ',k,ivb)
+         call mxmtest(s(1,1, 3,k),nn,c(k, 3),mxmur2,'mxmu2',k,ivb)
+         call mxmtest(s(1,1, 4,k),nn,c(k, 4),mxmur3,'mxmu3',k,ivb)
+         call mxmtest(s(1,1, 5,k),nn,c(k, 5),mxmd  ,'mxmd ',k,ivb)
+         call mxmtest(s(1,1, 6,k),nn,c(k, 6),mxmfb ,'mxmfb',k,ivb)
+         call mxmtest(s(1,1, 7,k),nn,c(k, 7),mxmu4 ,'mxmu4',k,ivb)
+         call mxmtest(s(1,1, 8,k),nn,c(k, 8),mxmf3 ,'mxmf3',k,ivb)
+         if (k.eq.2) ! Add works only for NxN case
+     $   call mxmtest(s(1,1, 9,k),nn,c(k, 9),madd  ,'madd ',k,ivb)
+         call mxmtest(s(1,1,10,k),nn,c(k,10),mxm   ,'mxm  ',k,ivb)
+      enddo
+
+      call nekgsync
+      if (nid.eq.0) call mxm_analyze(s,a,nn,c,nt,ivb)
+      call nekgsync
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine initab(a,b,n)
+      real a(1),b(1)
+      do i=1,n-1
+         x  = i
+         k = mod(i,19) + 2
+         l = mod(i,17) + 5
+         m = mod(i,31) + 3
+         a(i) = -.25*(a(i)+a(i+1)) + (x*x + k + l)/(x*x+m)
+         b(i) = -.25*(b(i)+b(i+1)) + (x*x + k + m)/(x*x+l)
+      enddo
+      a(n) = -.25*(a(n)+a(n)) + (x*x + k + l)/(x*x+m)
+      b(n) = -.25*(b(n)+b(n)) + (x*x + k + m)/(x*x+l)
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxms(a,n1,b,n2,c,n3)
+C----------------------------------------------------------------------
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C---------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+C
+         N0=N1*N3
+         DO 10 I=1,N0
+            C(I,1)=0.
+ 10      CONTINUE
+         DO 100 J=1,N3
+         DO 100 K=1,N2
+         BB=B(K,J)
+         DO 100 I=1,N1
+            C(I,J)=C(I,J)+A(I,K)*BB
+ 100     CONTINUE
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmu4(a,n1,b,n2,c,n3)
+C----------------------------------------------------------------------
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C---------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+C
+         N0=N1*N3
+         DO 10 I=1,N0
+            C(I,1)=0.
+ 10      CONTINUE
+         i1 = n1 - mod(n1,4) + 1
+            DO 100 J=1,N3
+            DO 100 K=1,N2
+            BB=B(K,J)
+               DO I=1,N1-3,4
+                  C(I  ,J)=C(I  ,J)+A(I  ,K)*BB
+                  C(I+1,J)=C(I+1,J)+A(I+1,K)*BB
+                  C(I+2,J)=C(I+2,J)+A(I+2,K)*BB
+                  C(I+3,J)=C(I+3,J)+A(I+3,K)*BB
+               enddo
+               DO i=i1,N1
+                  C(I  ,J)=C(I  ,J)+A(I  ,K)*BB
+               enddo
+ 100        CONTINUE
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine madd (a,n1,b,n2,c,n3)
+c
+      real a(n1,n2),b(n2,n3),c(n1,n3)
+c
+      do j=1,n3
+      do i=1,n1
+         c(i,j) = a(i,j)+b(i,j)
+      enddo
+      enddo
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmUR2(a,n1,b,n2,c,n3)
+C----------------------------------------------------------------------
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C---------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+C
+      if (n2.le.8) then
+         if (n2.eq.1) then
+            call mxmur2_1(a,n1,b,n2,c,n3)
+         elseif (n2.eq.2) then
+            call mxmur2_2(a,n1,b,n2,c,n3)
+         elseif (n2.eq.3) then
+            call mxmur2_3(a,n1,b,n2,c,n3)
+         elseif (n2.eq.4) then
+            call mxmur2_4(a,n1,b,n2,c,n3)
+         elseif (n2.eq.5) then
+            call mxmur2_5(a,n1,b,n2,c,n3)
+         elseif (n2.eq.6) then
+            call mxmur2_6(a,n1,b,n2,c,n3)
+         elseif (n2.eq.7) then
+            call mxmur2_7(a,n1,b,n2,c,n3)
+         else
+            call mxmur2_8(a,n1,b,n2,c,n3)
+         endif
+      elseif (n2.le.16) then
+         if (n2.eq.9) then
+            call mxmur2_9(a,n1,b,n2,c,n3)
+         elseif (n2.eq.10) then
+            call mxmur2_10(a,n1,b,n2,c,n3)
+         elseif (n2.eq.11) then
+            call mxmur2_11(a,n1,b,n2,c,n3)
+         elseif (n2.eq.12) then
+            call mxmur2_12(a,n1,b,n2,c,n3)
+         elseif (n2.eq.13) then
+            call mxmur2_13(a,n1,b,n2,c,n3)
+         elseif (n2.eq.14) then
+            call mxmur2_14(a,n1,b,n2,c,n3)
+         elseif (n2.eq.15) then
+            call mxmur2_15(a,n1,b,n2,c,n3)
+         else
+            call mxmur2_16(a,n1,b,n2,c,n3)
+         endif
+      else
+         N0=N1*N3
+         DO 10 I=1,N0
+            C(I,1)=0.
+ 10      CONTINUE
+         DO 100 J=1,N3
+         DO 100 K=1,N2
+         BB=B(K,J)
+         DO 100 I=1,N1
+            C(I,J)=C(I,J)+A(I,K)*BB
+ 100     CONTINUE
+      endif
+      return
+      end
+c
+      subroutine mxmur2_1(a,n1,b,n2,c,n3)
+c
+      real a(n1,1),b(1,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_2(a,n1,b,n2,c,n3)
+c
+      real a(n1,2),b(2,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_3(a,n1,b,n2,c,n3)
+c
+      real a(n1,3),b(3,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_4(a,n1,b,n2,c,n3)
+c
+      real a(n1,4),b(4,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_5(a,n1,b,n2,c,n3)
+c
+      real a(n1,5),b(5,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_6(a,n1,b,n2,c,n3)
+c
+      real a(n1,6),b(6,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_7(a,n1,b,n2,c,n3)
+c
+      real a(n1,7),b(7,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_8(a,n1,b,n2,c,n3)
+c
+      real a(n1,8),b(8,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_9(a,n1,b,n2,c,n3)
+c
+      real a(n1,9),b(9,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_10(a,n1,b,n2,c,n3)
+c
+      real a(n1,10),b(10,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_11(a,n1,b,n2,c,n3)
+c
+      real a(n1,11),b(11,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_12(a,n1,b,n2,c,n3)
+c
+      real a(n1,12),b(12,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_13(a,n1,b,n2,c,n3)
+c
+      real a(n1,13),b(13,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_14(a,n1,b,n2,c,n3)
+c
+      real a(n1,14),b(14,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_15(a,n1,b,n2,c,n3)
+c
+      real a(n1,15),b(15,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+         enddo
+      enddo
+      return
+      end
+      subroutine mxmur2_16(a,n1,b,n2,c,n3)
+c
+      real a(n1,16),b(16,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmUR3(a,n1,b,n2,c,n3)
+C----------------------------------------------------------------------
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C---------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+C
+      N0=N1*N3
+      DO 10 I=1,N0
+         C(I,1)=0.
+ 10   CONTINUE
+      if (n3.le.8) then
+         if (n3.eq.1) then
+            call mxmur3_1(a,n1,b,n2,c,n3)
+         elseif (n3.eq.2) then
+            call mxmur3_2(a,n1,b,n2,c,n3)
+         elseif (n3.eq.3) then
+            call mxmur3_3(a,n1,b,n2,c,n3)
+         elseif (n3.eq.4) then
+            call mxmur3_4(a,n1,b,n2,c,n3)
+         elseif (n3.eq.5) then
+            call mxmur3_5(a,n1,b,n2,c,n3)
+         elseif (n3.eq.6) then
+            call mxmur3_6(a,n1,b,n2,c,n3)
+         elseif (n3.eq.7) then
+            call mxmur3_7(a,n1,b,n2,c,n3)
+         else
+            call mxmur3_8(a,n1,b,n2,c,n3)
+         endif
+      elseif (n3.le.16) then
+         if (n3.eq.9) then
+            call mxmur3_9(a,n1,b,n2,c,n3)
+         elseif (n3.eq.10) then
+            call mxmur3_10(a,n1,b,n2,c,n3)
+         elseif (n3.eq.11) then
+            call mxmur3_11(a,n1,b,n2,c,n3)
+         elseif (n3.eq.12) then
+            call mxmur3_12(a,n1,b,n2,c,n3)
+         elseif (n3.eq.13) then
+            call mxmur3_13(a,n1,b,n2,c,n3)
+         elseif (n3.eq.14) then
+            call mxmur3_14(a,n1,b,n2,c,n3)
+         elseif (n3.eq.15) then
+            call mxmur3_15(a,n1,b,n2,c,n3)
+         else
+            call mxmur3_16(a,n1,b,n2,c,n3)
+         endif
+      else
+         DO 100 J=1,N3
+         DO 100 K=1,N2
+         BB=B(K,J)
+         DO 100 I=1,N1
+            C(I,J)=C(I,J)+A(I,K)*BB
+ 100     CONTINUE
+      endif
+      return
+      end
+c
+      subroutine mxmur3_16(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,16),c(n1,16)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         tmp10 =  b(k,10)
+         tmp11 =  b(k,11)
+         tmp12 =  b(k,12)
+         tmp13 =  b(k,13)
+         tmp14 =  b(k,14)
+         tmp15 =  b(k,15)
+         tmp16 =  b(k,16)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+            c(i,10)  =  c(i,10) + a(i,k) * tmp10
+            c(i,11)  =  c(i,11) + a(i,k) * tmp11
+            c(i,12)  =  c(i,12) + a(i,k) * tmp12
+            c(i,13)  =  c(i,13) + a(i,k) * tmp13
+            c(i,14)  =  c(i,14) + a(i,k) * tmp14
+            c(i,15)  =  c(i,15) + a(i,k) * tmp15
+            c(i,16)  =  c(i,16) + a(i,k) * tmp16
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_15(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,15),c(n1,15)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         tmp10 =  b(k,10)
+         tmp11 =  b(k,11)
+         tmp12 =  b(k,12)
+         tmp13 =  b(k,13)
+         tmp14 =  b(k,14)
+         tmp15 =  b(k,15)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+            c(i,10)  =  c(i,10) + a(i,k) * tmp10
+            c(i,11)  =  c(i,11) + a(i,k) * tmp11
+            c(i,12)  =  c(i,12) + a(i,k) * tmp12
+            c(i,13)  =  c(i,13) + a(i,k) * tmp13
+            c(i,14)  =  c(i,14) + a(i,k) * tmp14
+            c(i,15)  =  c(i,15) + a(i,k) * tmp15
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_14(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,14),c(n1,14)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         tmp10 =  b(k,10)
+         tmp11 =  b(k,11)
+         tmp12 =  b(k,12)
+         tmp13 =  b(k,13)
+         tmp14 =  b(k,14)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+            c(i,10)  =  c(i,10) + a(i,k) * tmp10
+            c(i,11)  =  c(i,11) + a(i,k) * tmp11
+            c(i,12)  =  c(i,12) + a(i,k) * tmp12
+            c(i,13)  =  c(i,13) + a(i,k) * tmp13
+            c(i,14)  =  c(i,14) + a(i,k) * tmp14
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_13(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,13),c(n1,13)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         tmp10 =  b(k,10)
+         tmp11 =  b(k,11)
+         tmp12 =  b(k,12)
+         tmp13 =  b(k,13)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+            c(i,10)  =  c(i,10) + a(i,k) * tmp10
+            c(i,11)  =  c(i,11) + a(i,k) * tmp11
+            c(i,12)  =  c(i,12) + a(i,k) * tmp12
+            c(i,13)  =  c(i,13) + a(i,k) * tmp13
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_12(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,12),c(n1,12)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         tmp10 =  b(k,10)
+         tmp11 =  b(k,11)
+         tmp12 =  b(k,12)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+            c(i,10)  =  c(i,10) + a(i,k) * tmp10
+            c(i,11)  =  c(i,11) + a(i,k) * tmp11
+            c(i,12)  =  c(i,12) + a(i,k) * tmp12
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_11(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,11),c(n1,11)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         tmp10 =  b(k,10)
+         tmp11 =  b(k,11)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+            c(i,10)  =  c(i,10) + a(i,k) * tmp10
+            c(i,11)  =  c(i,11) + a(i,k) * tmp11
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_10(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,10),c(n1,10)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         tmp10 =  b(k,10)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+            c(i,10)  =  c(i,10) + a(i,k) * tmp10
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_9(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,9),c(n1,9)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         tmp9  =  b(k, 9)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_8(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,8),c(n1,8)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         tmp8  =  b(k, 8)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_7(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,7),c(n1,7)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         tmp7  =  b(k, 7)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_6(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,6),c(n1,6)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         tmp6  =  b(k, 6)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_5(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,5),c(n1,5)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         tmp5  =  b(k, 5)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_4(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,4),c(n1,4)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         tmp4  =  b(k, 4)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_3(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,3),c(n1,3)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         tmp3  =  b(k, 3)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_2(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,2),c(n1,2)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         tmp2  =  b(k, 2)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
+         enddo
+c
+      enddo
+c
+      return
+      end
+      subroutine mxmur3_1(a,n1,b,n2,c,n3)
+      real a(n1,n2),b(n2,1),c(n1,1)
+c
+      do k=1,n2
+         tmp1  =  b(k, 1)
+         do i=1,n1
+            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
+         enddo
+      enddo
+c
+      return
+      end
+C----------------------------------------------------------------------
+      subroutine mxmd(a,n1,b,n2,c,n3)
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C---------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+      REAL ONE,ZERO,EPS
+C
+C
+C
+      one=1.0
+      zero=0.0
+      call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb(a,n1,b,n2,c,n3)
+C-----------------------------------------------------------------------
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C----------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+C
+      integer wdsize
+      save    wdsize
+      data    wdsize/0/
+c
+c     First call: determine word size for dgemm/sgemm discrimination, below.
+c
+      if (wdsize.eq.0) then
+         one = 1.0
+         eps = 1.e-12
+         wdsize = 8
+         if (one+eps.eq.1.0) wdsize = 4
+      endif
+c
+      if (n2.le.8) then
+         if (n2.eq.1) then
+            call mxmfb_1(a,n1,b,n2,c,n3)
+         elseif (n2.eq.2) then
+            call mxmfb_2(a,n1,b,n2,c,n3)
+         elseif (n2.eq.3) then
+            call mxmfb_3(a,n1,b,n2,c,n3)
+         elseif (n2.eq.4) then
+            call mxmfb_4(a,n1,b,n2,c,n3)
+         elseif (n2.eq.5) then
+            call mxmfb_5(a,n1,b,n2,c,n3)
+         elseif (n2.eq.6) then
+            call mxmfb_6(a,n1,b,n2,c,n3)
+         elseif (n2.eq.7) then
+            call mxmfb_7(a,n1,b,n2,c,n3)
+         else
+            call mxmfb_8(a,n1,b,n2,c,n3)
+         endif
+      elseif (n2.le.16) then
+         if (n2.eq.9) then
+            call mxmfb_9(a,n1,b,n2,c,n3)
+         elseif (n2.eq.10) then
+            call mxmfb_10(a,n1,b,n2,c,n3)
+         elseif (n2.eq.11) then
+            call mxmfb_11(a,n1,b,n2,c,n3)
+         elseif (n2.eq.12) then
+            call mxmfb_12(a,n1,b,n2,c,n3)
+         elseif (n2.eq.13) then
+            call mxmfb_13(a,n1,b,n2,c,n3)
+         elseif (n2.eq.14) then
+            call mxmfb_14(a,n1,b,n2,c,n3)
+         elseif (n2.eq.15) then
+            call mxmfb_15(a,n1,b,n2,c,n3)
+         else
+            call mxmfb_16(a,n1,b,n2,c,n3)
+         endif
+      elseif (n2.le.24) then
+         if (n2.eq.17) then
+            call mxmfb_17(a,n1,b,n2,c,n3)
+         elseif (n2.eq.18) then
+            call mxmfb_18(a,n1,b,n2,c,n3)
+         elseif (n2.eq.19) then
+            call mxmfb_19(a,n1,b,n2,c,n3)
+         elseif (n2.eq.20) then
+            call mxmfb_20(a,n1,b,n2,c,n3)
+         elseif (n2.eq.21) then
+            call mxmfb_21(a,n1,b,n2,c,n3)
+         elseif (n2.eq.22) then
+            call mxmfb_22(a,n1,b,n2,c,n3)
+         elseif (n2.eq.23) then
+            call mxmfb_23(a,n1,b,n2,c,n3)
+         elseif (n2.eq.24) then
+            call mxmfb_24(a,n1,b,n2,c,n3)
+         endif
+      else
+c
+         one=1.0
+         zero=0.0
+         if (wdsize.eq.4) then
+            call sgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
+         else
+            call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
+         endif
+ 
+      endif
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_1(a,n1,b,n2,c,n3)
+c
+      real a(n1,1),b(1,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_2(a,n1,b,n2,c,n3)
+c
+      real a(n1,2),b(2,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_3(a,n1,b,n2,c,n3)
+c
+      real a(n1,3),b(3,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_4(a,n1,b,n2,c,n3)
+c
+      real a(n1,4),b(4,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_5(a,n1,b,n2,c,n3)
+c
+      real a(n1,5),b(5,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_6(a,n1,b,n2,c,n3)
+c
+      real a(n1,6),b(6,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_7(a,n1,b,n2,c,n3)
+c
+      real a(n1,7),b(7,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_8(a,n1,b,n2,c,n3)
+c
+      real a(n1,8),b(8,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_9(a,n1,b,n2,c,n3)
+c
+      real a(n1,9),b(9,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_10(a,n1,b,n2,c,n3)
+c
+      real a(n1,10),b(10,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_11(a,n1,b,n2,c,n3)
+c
+      real a(n1,11),b(11,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_12(a,n1,b,n2,c,n3)
+c
+      real a(n1,12),b(12,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_13(a,n1,b,n2,c,n3)
+c
+      real a(n1,13),b(13,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_14(a,n1,b,n2,c,n3)
+c
+      real a(n1,14),b(14,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_15(a,n1,b,n2,c,n3)
+c
+      real a(n1,15),b(15,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_16(a,n1,b,n2,c,n3)
+c
+      real a(n1,16),b(16,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_17(a,n1,b,n2,c,n3)
+c
+      real a(n1,17),b(17,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_18(a,n1,b,n2,c,n3)
+c
+      real a(n1,18),b(18,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_19(a,n1,b,n2,c,n3)
+c
+      real a(n1,19),b(19,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_20(a,n1,b,n2,c,n3)
+c
+      real a(n1,20),b(20,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_21(a,n1,b,n2,c,n3)
+c
+      real a(n1,21),b(21,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_22(a,n1,b,n2,c,n3)
+c
+      real a(n1,22),b(22,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_23(a,n1,b,n2,c,n3)
+c
+      real a(n1,23),b(23,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+     $             + a(i,23)*b(23,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmfb_24(a,n1,b,n2,c,n3)
+c
+      real a(n1,24),b(24,n3),c(n1,n3)
+c
+      do j=1,n3
+         do i=1,n1
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+     $             + a(i,23)*b(23,j)
+     $             + a(i,24)*b(24,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3(a,n1,b,n2,c,n3)
+C-----------------------------------------------------------------------
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C----------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+C
+      integer wdsize
+      save    wdsize
+      data    wdsize/0/
+c
+c     First call: determine word size for dgemm/sgemm discrimination, below.
+c
+      if (wdsize.eq.0) then
+         one = 1.0
+         eps = 1.e-12
+         wdsize = 8
+         if (one+eps.eq.1.0) wdsize = 4
+      endif
+c
+      if (n2.le.8) then
+         if (n2.eq.1) then
+            call mxmf3_1(a,n1,b,n2,c,n3)
+         elseif (n2.eq.2) then
+            call mxmf3_2(a,n1,b,n2,c,n3)
+         elseif (n2.eq.3) then
+            call mxmf3_3(a,n1,b,n2,c,n3)
+         elseif (n2.eq.4) then
+            call mxmf3_4(a,n1,b,n2,c,n3)
+         elseif (n2.eq.5) then
+            call mxmf3_5(a,n1,b,n2,c,n3)
+         elseif (n2.eq.6) then
+            call mxmf3_6(a,n1,b,n2,c,n3)
+         elseif (n2.eq.7) then
+            call mxmf3_7(a,n1,b,n2,c,n3)
+         else
+            call mxmf3_8(a,n1,b,n2,c,n3)
+         endif
+      elseif (n2.le.16) then
+         if (n2.eq.9) then
+            call mxmf3_9(a,n1,b,n2,c,n3)
+         elseif (n2.eq.10) then
+            call mxmf3_10(a,n1,b,n2,c,n3)
+         elseif (n2.eq.11) then
+            call mxmf3_11(a,n1,b,n2,c,n3)
+         elseif (n2.eq.12) then
+            call mxmf3_12(a,n1,b,n2,c,n3)
+         elseif (n2.eq.13) then
+            call mxmf3_13(a,n1,b,n2,c,n3)
+         elseif (n2.eq.14) then
+            call mxmf3_14(a,n1,b,n2,c,n3)
+         elseif (n2.eq.15) then
+            call mxmf3_15(a,n1,b,n2,c,n3)
+         else
+            call mxmf3_16(a,n1,b,n2,c,n3)
+         endif
+      elseif (n2.le.24) then
+         if (n2.eq.17) then
+            call mxmf3_17(a,n1,b,n2,c,n3)
+         elseif (n2.eq.18) then
+            call mxmf3_18(a,n1,b,n2,c,n3)
+         elseif (n2.eq.19) then
+            call mxmf3_19(a,n1,b,n2,c,n3)
+         elseif (n2.eq.20) then
+            call mxmf3_20(a,n1,b,n2,c,n3)
+         elseif (n2.eq.21) then
+            call mxmf3_21(a,n1,b,n2,c,n3)
+         elseif (n2.eq.22) then
+            call mxmf3_22(a,n1,b,n2,c,n3)
+         elseif (n2.eq.23) then
+            call mxmf3_23(a,n1,b,n2,c,n3)
+         elseif (n2.eq.24) then
+            call mxmf3_24(a,n1,b,n2,c,n3)
+         endif
+      else
+c
+         one=1.0
+         zero=0.0
+         if (wdsize.eq.4) then
+            call sgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
+         else
+            call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
+         endif
+c
+c        N0=N1*N3
+c        DO 10 I=1,N0
+c           C(I,1)=0.
+c  10    CONTINUE
+c        DO 100 J=1,N3
+c        DO 100 K=1,N2
+c        BB=B(K,J)
+c        DO 100 I=1,N1
+c           C(I,J)=C(I,J)+A(I,K)*BB
+c 100    CONTINUE
+ 
+      endif
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_1(a,n1,b,n2,c,n3)
+c
+      real a(n1,1),b(1,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_2(a,n1,b,n2,c,n3)
+c
+      real a(n1,2),b(2,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_3(a,n1,b,n2,c,n3)
+c
+      real a(n1,3),b(3,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_4(a,n1,b,n2,c,n3)
+c
+      real a(n1,4),b(4,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_5(a,n1,b,n2,c,n3)
+c
+      real a(n1,5),b(5,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_6(a,n1,b,n2,c,n3)
+c
+      real a(n1,6),b(6,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_7(a,n1,b,n2,c,n3)
+c
+      real a(n1,7),b(7,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_8(a,n1,b,n2,c,n3)
+c
+      real a(n1,8),b(8,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_9(a,n1,b,n2,c,n3)
+c
+      real a(n1,9),b(9,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_10(a,n1,b,n2,c,n3)
+c
+      real a(n1,10),b(10,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_11(a,n1,b,n2,c,n3)
+c
+      real a(n1,11),b(11,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_12(a,n1,b,n2,c,n3)
+c
+      real a(n1,12),b(12,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_13(a,n1,b,n2,c,n3)
+c
+      real a(n1,13),b(13,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_14(a,n1,b,n2,c,n3)
+c
+      real a(n1,14),b(14,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_15(a,n1,b,n2,c,n3)
+c
+      real a(n1,15),b(15,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_16(a,n1,b,n2,c,n3)
+c
+      real a(n1,16),b(16,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_17(a,n1,b,n2,c,n3)
+c
+      real a(n1,17),b(17,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_18(a,n1,b,n2,c,n3)
+c
+      real a(n1,18),b(18,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_19(a,n1,b,n2,c,n3)
+c
+      real a(n1,19),b(19,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_20(a,n1,b,n2,c,n3)
+c
+      real a(n1,20),b(20,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_21(a,n1,b,n2,c,n3)
+c
+      real a(n1,21),b(21,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_22(a,n1,b,n2,c,n3)
+c
+      real a(n1,22),b(22,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_23(a,n1,b,n2,c,n3)
+c
+      real a(n1,23),b(23,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+     $             + a(i,23)*b(23,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmf3_24(a,n1,b,n2,c,n3)
+c
+      real a(n1,24),b(24,n3),c(n1,n3)
+c
+      do i=1,n1
+         do j=1,n3
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+     $             + a(i,3)*b(3,j)
+     $             + a(i,4)*b(4,j)
+     $             + a(i,5)*b(5,j)
+     $             + a(i,6)*b(6,j)
+     $             + a(i,7)*b(7,j)
+     $             + a(i,8)*b(8,j)
+     $             + a(i,9)*b(9,j)
+     $             + a(i,10)*b(10,j)
+     $             + a(i,11)*b(11,j)
+     $             + a(i,12)*b(12,j)
+     $             + a(i,13)*b(13,j)
+     $             + a(i,14)*b(14,j)
+     $             + a(i,15)*b(15,j)
+     $             + a(i,16)*b(16,j)
+     $             + a(i,17)*b(17,j)
+     $             + a(i,18)*b(18,j)
+     $             + a(i,19)*b(19,j)
+     $             + a(i,20)*b(20,j)
+     $             + a(i,21)*b(21,j)
+     $             + a(i,22)*b(22,j)
+     $             + a(i,23)*b(23,j)
+     $             + a(i,24)*b(24,j)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxm44(a,n1,b,n2,c,n3)
+C-----------------------------------------------------------------------
+C
+C     NOTE -- this code has been set up with the "mxmf3" routine
+c             referenced in memtime.f.   On most machines, the f2
+c             and f3 versions give the same performance (f2 is the
+c             nekton standard).  On the t3e, f3 is noticeably faster.
+c             pff  10/5/98
+C
+C
+C     Matrix-vector product routine. 
+C     NOTE: Use assembly coded routine if available.
+C
+C----------------------------------------------------------------------
+      REAL A(N1,N2),B(N2,N3),C(N1,N3)
+c
+      if (n2.eq.1) then
+         call mxm44_2_t(a,n1,b,2,c,n3)
+      elseif (n2.eq.2) then
+         call mxm44_2_t(a,n1,b,n2,c,n3)
+      else
+         call mxm44_0_t(a,n1,b,n2,c,n3)
+      endif
+c
+      return
+      end
+c
+c-----------------------------------------------------------------------
+      subroutine mxm44_0_t(a, m, b, k, c, n)
+*      subroutine matmul44(m, n, k, a, lda, b, ldb, c, ldc)
+*      real*8 a(lda,k), b(ldb,n), c(ldc,n)
+      real a(m,k), b(k,n), c(m,n)
+      real s11, s12, s13, s14, s21, s22, s23, s24
+      real s31, s32, s33, s34, s41, s42, s43, s44
+c
+c matrix multiply with a 4x4 pencil 
+c
+
+      mresid = iand(m,3) 
+      nresid = iand(n,3) 
+      m1 = m - mresid + 1
+      n1 = n - nresid + 1
+
+      do i=1,m-mresid,4
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          s41 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+          s42 = 0.0d0
+          s13 = 0.0d0
+          s23 = 0.0d0
+          s33 = 0.0d0
+          s43 = 0.0d0
+          s14 = 0.0d0
+          s24 = 0.0d0
+          s34 = 0.0d0
+          s44 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(i,l)*b(l,j)
+            s12 = s12 + a(i,l)*b(l,j+1)
+            s13 = s13 + a(i,l)*b(l,j+2)
+            s14 = s14 + a(i,l)*b(l,j+3)
+
+            s21 = s21 + a(i+1,l)*b(l,j)
+            s22 = s22 + a(i+1,l)*b(l,j+1)
+            s23 = s23 + a(i+1,l)*b(l,j+2)
+            s24 = s24 + a(i+1,l)*b(l,j+3)
+
+            s31 = s31 + a(i+2,l)*b(l,j)
+            s32 = s32 + a(i+2,l)*b(l,j+1)
+            s33 = s33 + a(i+2,l)*b(l,j+2)
+            s34 = s34 + a(i+2,l)*b(l,j+3)
+
+            s41 = s41 + a(i+3,l)*b(l,j)
+            s42 = s42 + a(i+3,l)*b(l,j+1)
+            s43 = s43 + a(i+3,l)*b(l,j+2)
+            s44 = s44 + a(i+3,l)*b(l,j+3)
+          enddo
+          c(i,j)     = s11 
+          c(i,j+1)   = s12 
+          c(i,j+2)   = s13
+          c(i,j+3)   = s14
+
+          c(i+1,j)   = s21 
+          c(i+2,j)   = s31 
+          c(i+3,j)   = s41 
+
+          c(i+1,j+1) = s22
+          c(i+2,j+1) = s32
+          c(i+3,j+1) = s42
+
+          c(i+1,j+2) = s23
+          c(i+2,j+2) = s33
+          c(i+3,j+2) = s43
+
+          c(i+1,j+3) = s24
+          c(i+2,j+3) = s34
+          c(i+3,j+3) = s44
+        enddo
+* Residual when n is not multiple of 4
+        if (nresid .ne. 0) then
+          if (nresid .eq. 1) then
+            s11 = 0.0d0
+            s21 = 0.0d0
+            s31 = 0.0d0
+            s41 = 0.0d0
+            do l=1,k
+              s11 = s11 + a(i,l)*b(l,n)
+              s21 = s21 + a(i+1,l)*b(l,n)
+              s31 = s31 + a(i+2,l)*b(l,n)
+              s41 = s41 + a(i+3,l)*b(l,n)
+            enddo
+            c(i,n)     = s11 
+            c(i+1,n)   = s21 
+            c(i+2,n)   = s31 
+            c(i+3,n)   = s41 
+          elseif (nresid .eq. 2) then
+            s11 = 0.0d0
+            s21 = 0.0d0
+            s31 = 0.0d0
+            s41 = 0.0d0
+            s12 = 0.0d0
+            s22 = 0.0d0
+            s32 = 0.0d0
+            s42 = 0.0d0
+            do l=1,k
+              s11 = s11 + a(i,l)*b(l,j)
+              s12 = s12 + a(i,l)*b(l,j+1)
+
+              s21 = s21 + a(i+1,l)*b(l,j)
+              s22 = s22 + a(i+1,l)*b(l,j+1)
+
+              s31 = s31 + a(i+2,l)*b(l,j)
+              s32 = s32 + a(i+2,l)*b(l,j+1)
+
+              s41 = s41 + a(i+3,l)*b(l,j)
+              s42 = s42 + a(i+3,l)*b(l,j+1)
+            enddo
+            c(i,j)     = s11 
+            c(i,j+1)   = s12
+
+            c(i+1,j)   = s21 
+            c(i+2,j)   = s31 
+            c(i+3,j)   = s41 
+
+            c(i+1,j+1) = s22
+            c(i+2,j+1) = s32
+            c(i+3,j+1) = s42
+          else
+            s11 = 0.0d0
+            s21 = 0.0d0
+            s31 = 0.0d0
+            s41 = 0.0d0
+            s12 = 0.0d0
+            s22 = 0.0d0
+            s32 = 0.0d0
+            s42 = 0.0d0
+            s13 = 0.0d0
+            s23 = 0.0d0
+            s33 = 0.0d0
+            s43 = 0.0d0
+            do l=1,k
+              s11 = s11 + a(i,l)*b(l,j)
+              s12 = s12 + a(i,l)*b(l,j+1)
+              s13 = s13 + a(i,l)*b(l,j+2)
+
+              s21 = s21 + a(i+1,l)*b(l,j)
+              s22 = s22 + a(i+1,l)*b(l,j+1)
+              s23 = s23 + a(i+1,l)*b(l,j+2)
+
+              s31 = s31 + a(i+2,l)*b(l,j)
+              s32 = s32 + a(i+2,l)*b(l,j+1)
+              s33 = s33 + a(i+2,l)*b(l,j+2)
+
+              s41 = s41 + a(i+3,l)*b(l,j)
+              s42 = s42 + a(i+3,l)*b(l,j+1)
+              s43 = s43 + a(i+3,l)*b(l,j+2)
+            enddo
+            c(i,j)     = s11 
+            c(i+1,j)   = s21 
+            c(i+2,j)   = s31 
+            c(i+3,j)   = s41 
+            c(i,j+1)   = s12 
+            c(i+1,j+1) = s22
+            c(i+2,j+1) = s32
+            c(i+3,j+1) = s42
+            c(i,j+2)   = s13
+            c(i+1,j+2) = s23
+            c(i+2,j+2) = s33
+            c(i+3,j+2) = s43
+          endif
+        endif
+      enddo
+
+* Residual when m is not multiple of 4
+      if (mresid .eq. 0) then
+        return
+      elseif (mresid .eq. 1) then
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s12 = 0.0d0
+          s13 = 0.0d0
+          s14 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,j)
+            s12 = s12 + a(m,l)*b(l,j+1)
+            s13 = s13 + a(m,l)*b(l,j+2)
+            s14 = s14 + a(m,l)*b(l,j+3)
+          enddo
+          c(m,j)     = s11 
+          c(m,j+1)   = s12 
+          c(m,j+2)   = s13
+          c(m,j+3)   = s14
+        enddo
+* mresid is 1, check nresid
+        if (nresid .eq. 0) then
+          return
+        elseif (nresid .eq. 1) then
+          s11 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,n)
+          enddo
+          c(m,n) = s11
+          return
+        elseif (nresid .eq. 2) then
+          s11 = 0.0d0
+          s12 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,n-1)
+            s12 = s12 + a(m,l)*b(l,n)
+          enddo
+          c(m,n-1) = s11
+          c(m,n) = s12
+          return
+        else
+          s11 = 0.0d0
+          s12 = 0.0d0
+          s13 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m,l)*b(l,n-2)
+            s12 = s12 + a(m,l)*b(l,n-1)
+            s13 = s13 + a(m,l)*b(l,n)
+          enddo
+          c(m,n-2) = s11
+          c(m,n-1) = s12
+          c(m,n) = s13
+          return
+        endif          
+      elseif (mresid .eq. 2) then
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s12 = 0.0d0
+          s13 = 0.0d0
+          s14 = 0.0d0
+          s21 = 0.0d0
+          s22 = 0.0d0
+          s23 = 0.0d0
+          s24 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,j)
+            s12 = s12 + a(m-1,l)*b(l,j+1)
+            s13 = s13 + a(m-1,l)*b(l,j+2)
+            s14 = s14 + a(m-1,l)*b(l,j+3)
+
+            s21 = s21 + a(m,l)*b(l,j)
+            s22 = s22 + a(m,l)*b(l,j+1)
+            s23 = s23 + a(m,l)*b(l,j+2)
+            s24 = s24 + a(m,l)*b(l,j+3)
+          enddo
+          c(m-1,j)   = s11 
+          c(m-1,j+1) = s12 
+          c(m-1,j+2) = s13
+          c(m-1,j+3) = s14
+          c(m,j)     = s21
+          c(m,j+1)   = s22 
+          c(m,j+2)   = s23
+          c(m,j+3)   = s24
+        enddo
+* mresid is 2, check nresid
+        if (nresid .eq. 0) then
+          return
+        elseif (nresid .eq. 1) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,n)
+            s21 = s21 + a(m,l)*b(l,n)
+          enddo
+          c(m-1,n) = s11
+          c(m,n) = s21
+          return
+        elseif (nresid .eq. 2) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,n-1)
+            s12 = s12 + a(m-1,l)*b(l,n)
+            s21 = s21 + a(m,l)*b(l,n-1)
+            s22 = s22 + a(m,l)*b(l,n)
+          enddo
+          c(m-1,n-1) = s11
+          c(m-1,n)   = s12
+          c(m,n-1)   = s21
+          c(m,n)     = s22
+          return
+        else
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s13 = 0.0d0
+          s23 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-1,l)*b(l,n-2)
+            s12 = s12 + a(m-1,l)*b(l,n-1)
+            s13 = s13 + a(m-1,l)*b(l,n)
+            s21 = s21 + a(m,l)*b(l,n-2)
+            s22 = s22 + a(m,l)*b(l,n-1)
+            s23 = s23 + a(m,l)*b(l,n)
+          enddo
+          c(m-1,n-2) = s11
+          c(m-1,n-1) = s12
+          c(m-1,n)   = s13
+          c(m,n-2)   = s21
+          c(m,n-1)   = s22
+          c(m,n)     = s23
+          return
+        endif
+      else
+* mresid is 3
+        do j=1,n-nresid,4
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+
+          s13 = 0.0d0
+          s23 = 0.0d0
+          s33 = 0.0d0
+
+          s14 = 0.0d0
+          s24 = 0.0d0
+          s34 = 0.0d0
+
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,j)
+            s12 = s12 + a(m-2,l)*b(l,j+1)
+            s13 = s13 + a(m-2,l)*b(l,j+2)
+            s14 = s14 + a(m-2,l)*b(l,j+3)
+
+            s21 = s21 + a(m-1,l)*b(l,j)
+            s22 = s22 + a(m-1,l)*b(l,j+1)
+            s23 = s23 + a(m-1,l)*b(l,j+2)
+            s24 = s24 + a(m-1,l)*b(l,j+3)
+
+            s31 = s31 + a(m,l)*b(l,j)
+            s32 = s32 + a(m,l)*b(l,j+1)
+            s33 = s33 + a(m,l)*b(l,j+2)
+            s34 = s34 + a(m,l)*b(l,j+3)
+          enddo
+          c(m-2,j)   = s11 
+          c(m-2,j+1) = s12 
+          c(m-2,j+2) = s13
+          c(m-2,j+3) = s14
+
+          c(m-1,j)   = s21 
+          c(m-1,j+1) = s22
+          c(m-1,j+2) = s23
+          c(m-1,j+3) = s24
+
+          c(m,j)     = s31 
+          c(m,j+1)   = s32
+          c(m,j+2)   = s33
+          c(m,j+3)   = s34
+        enddo
+* mresid is 3, check nresid
+        if (nresid .eq. 0) then
+          return
+        elseif (nresid .eq. 1) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,n)
+            s21 = s21 + a(m-1,l)*b(l,n)
+            s31 = s31 + a(m,l)*b(l,n)
+          enddo
+          c(m-2,n) = s11
+          c(m-1,n) = s21
+          c(m,n) = s31
+          return
+        elseif (nresid .eq. 2) then
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,n-1)
+            s12 = s12 + a(m-2,l)*b(l,n)
+            s21 = s21 + a(m-1,l)*b(l,n-1)
+            s22 = s22 + a(m-1,l)*b(l,n)
+            s31 = s31 + a(m,l)*b(l,n-1)
+            s32 = s32 + a(m,l)*b(l,n)
+          enddo
+          c(m-2,n-1) = s11
+          c(m-2,n)   = s12
+          c(m-1,n-1) = s21
+          c(m-1,n)   = s22
+          c(m,n-1)   = s31
+          c(m,n)     = s32
+          return
+        else
+          s11 = 0.0d0
+          s21 = 0.0d0
+          s31 = 0.0d0
+          s12 = 0.0d0
+          s22 = 0.0d0
+          s32 = 0.0d0
+          s13 = 0.0d0
+          s23 = 0.0d0
+          s33 = 0.0d0
+          do l=1,k
+            s11 = s11 + a(m-2,l)*b(l,n-2)
+            s12 = s12 + a(m-2,l)*b(l,n-1)
+            s13 = s13 + a(m-2,l)*b(l,n)
+            s21 = s21 + a(m-1,l)*b(l,n-2)
+            s22 = s22 + a(m-1,l)*b(l,n-1)
+            s23 = s23 + a(m-1,l)*b(l,n)
+            s31 = s31 + a(m,l)*b(l,n-2)
+            s32 = s32 + a(m,l)*b(l,n-1)
+            s33 = s33 + a(m,l)*b(l,n)
+          enddo
+          c(m-2,n-2) = s11
+          c(m-2,n-1) = s12
+          c(m-2,n)   = s13
+          c(m-1,n-2) = s21
+          c(m-1,n-1) = s22
+          c(m-1,n)   = s23
+          c(m,n-2)   = s31
+          c(m,n-1)   = s32
+          c(m,n)     = s33
+          return
+        endif
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxm44_2_t(a, m, b, k, c, n)
+      real a(m,2), b(2,n), c(m,n)
+
+      nresid = iand(n,3) 
+      n1 = n - nresid + 1
+
+      do j=1,n-nresid,4
+         do i=1,m
+            c(i,j) = a(i,1)*b(1,j)
+     $             + a(i,2)*b(2,j)
+            c(i,j+1) = a(i,1)*b(1,j+1)
+     $             + a(i,2)*b(2,j+1)
+            c(i,j+2) = a(i,1)*b(1,j+2)
+     $             + a(i,2)*b(2,j+2)
+            c(i,j+3) = a(i,1)*b(1,j+3)
+     $             + a(i,2)*b(2,j+3)
+         enddo
+      enddo
+      if (nresid .eq. 0) then
+        return
+      elseif (nresid .eq. 1) then
+         do i=1,m
+            c(i,n) = a(i,1)*b(1,n)
+     $             + a(i,2)*b(2,n)
+         enddo
+      elseif (nresid .eq. 2) then
+         do i=1,m
+            c(i,n-1) = a(i,1)*b(1,n-1)
+     $             + a(i,2)*b(2,n-1)
+            c(i,n) = a(i,1)*b(1,n)
+     $             + a(i,2)*b(2,n)
+         enddo
+      else
+         do i=1,m
+            c(i,n-2) = a(i,1)*b(1,n-2)
+     $             + a(i,2)*b(2,n-2)
+            c(i,n-1) = a(i,1)*b(1,n-1)
+     $             + a(i,2)*b(2,n-1)
+            c(i,n) = a(i,1)*b(1,n)
+     $             + a(i,2)*b(2,n)
+         enddo
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxmtest(s,nn,cn,mxmt,name,k,ivb)
+
+      real        s(nn,2)   ! MFLOPS
+      character*5 cn        ! name
+      character*5 name
+      external mxmt
+
+      include 'SIZE'
+      parameter (lt=4*lx1*ly1*lz1*lelt)
+      common /scrns/ a(lt)
+      common /scruz/ b(lt)
+      common /scrmg/ c(lt)
+
+      integer ll,icalld
+      save    ll,icalld
+      data    ll,icalld  /1,0/
+
+      if (icalld.eq.0) then    !     Initialize matrices:
+         icalld = icalld + 1
+         time1 = dnekclock()
+         call initab(a,b,lt)
+         time2 = dnekclock()-time1
+         if (nid.eq.0) write(6,*) 'mxm test init:',lt,time2,name
+      endif
+
+
+      cn = name
+
+c     Rectangular matrix tests
+
+      nn0 = 1
+      nn1 = nn
+      if (ivb.eq.0) then
+         nn0 = lx1
+         nn1 = lx1
+      endif
+
+      m = k
+      do n=nn0,nn1
+         n1 = n
+         n2 = n
+         n3 = n
+         if (m.eq.1) n1 = n*n
+         if (m.eq.3) n3 = n*n
+         if (lt.gt.n1*n3) then
+          n13 = max(n1,n3)
+          loop = 250000/(n1*n2*n3) + 500
+          if (name.eq.'madd ') loop = 200000/(n1*n3) + 5000
+
+c-------------------------------------------------------
+c         mem test
+c-------------------------------------------------------
+
+          t0    = dnekclock()
+          overh = dnekclock()-t0
+          time1 = dnekclock()
+          do l=1,loop
+            if (ll.ge.lt-n1*n3) ll = 1
+            call mxmt(a(ll),n1,b(ll),n2,c(ll),n3)
+            ll = ll+n1*n3
+          enddo
+          time2 = dnekclock()
+          time = time2-time1 - overh
+          iops=loop*n1*n3*(2*n2-1)
+          if (name.eq.'madd ') iops = loop*n1*n3
+c         write(6,*) loop,time,time2,time1,overh
+          flops=iops/(1.0e6*time)
+          s(n,1) = flops
+c
+          timel = time/loop
+          if (nid.eq.0) write(6,199) n,n1,n2,n3,flops,timel,name
+  199     format(i3,'m',1x,3i6,f10.4,e16.5,3x,a5,' mem')
+c
+c-------------------------------------------------------
+c         fast test
+c-------------------------------------------------------
+c
+          call mxmt(a,n1,b,n2,c,n3)
+          t0    = dnekclock()
+          overh = dnekclock()-t0
+          time1 = dnekclock()
+          do l=1,loop
+            call mxmt(a,n1,b,n2,c,n3)
+          enddo
+          time2 = dnekclock()
+          time = time2-time1 - overh
+          iops=loop*n1*n3*(2*n2-1)
+          if (name.eq.'madd ') iops = loop*n1*n3
+          flops=iops/(1.0e6*time)
+          s(n,2) = flops
+          timel = time/loop
+c
+          if (nid.eq.0) write(6,198) n,n1,n2,n3,flops,timel,name
+  198     format(i3,'f',1x,3i6,f10.4,e16.5,3x,a5,' fast')
+c
+        endif
+       enddo
+c
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine mxm_analyze(s,a,nn,c,nt,ivb)
+      include 'SIZE'
+
+      character*5 c(3,nt)
+      real        s(nn,2,nt,3)  ! Measured Mflops, 3 cases
+      real        a(nn,2,nt,3)
+c                   ^  ^ ^  |__ N^2xN, NxN, NxN^2
+c  matrix order N __|  | |__________which mxm
+c                      |
+c                      |__cached vs. noncached data
+ 
+
+      integer itmax(200)
+
+      nn0 = 1
+      nn1 = nn
+      if (ivb.eq.0) then
+         nn0 = lx1
+         nn1 = lx1
+      endif
+
+      do n = nn0,nn1
+         fmax = 0.   ! Peak mflops
+         do it=1,nt
+            ai = 0.
+            di = 0.
+            do k=1,3
+               if (s(n,1,it,k).gt.0) then  ! Take harmonic means of
+                  ai = ai + 1./s(n,1,it,k) ! case I II and III for 
+                  di = di + 1.             ! mem test, s(n,1...).
+               endif
+            enddo
+            if (ai.gt.0) ai = di/ai
+            a(n,1,it,1) = di/ai
+            if (ai.gt.fmax.and.c(2,it).ne.'madd ') then
+               fmax     = ai
+               itmax(n) = it
+            endif
+         enddo
+         it = itmax(n)
+         if (nid.eq.0) write(6,3) n,it,c(2,it),(s(n,1,it,k),k=1,3),fmax
+    3    format(i3,i2,1x,a5,4f12.0,' Peak harmonic')
+      enddo
+      call out_anal(s,a,nn,c,nt,itmax,'Harmonic',1,ivb)
+c
+c     Case by case
+c
+      do k=1,3
+         do n = nn0,nn1
+            fmax = 0.   ! Peak mflops
+            do it=1,nt
+               ai = s(n,1,it,k)
+               if (ai.gt.fmax.and.c(2,it).ne.'madd ') then
+                  fmax     = ai
+                  itmax(n) = it
+               endif
+            enddo
+         enddo
+         if (k.eq.1) call out_anal(s,a,nn,c,nt,itmax,'Case N2N',k,ivb)
+         if (k.eq.2) call out_anal(s,a,nn,c,nt,itmax,'Case NxN',k,ivb)
+         if (k.eq.3) call out_anal(s,a,nn,c,nt,itmax,'Case NN2',k,ivb)
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine out_anal(s,a,nn,c,nt,itmax,name8,k,ivb)
+      include 'SIZE'
+
+      character*5 c(3,nt)
+      real        s(nn,2,nt,3)
+      real        a(nn,2,nt,3)
+      integer itmax(200)
+      character*8 name8
+
+      if (nid.ne.0) return
+
+      nn0 = 1
+      nn1 = nn
+      if (ivb.eq.0) then
+         nn0 = lx1
+         nn1 = lx1
+      endif
+
+
+      do n=nn0,nn1
+         it = itmax(n)
+         write(6,1) n,s(n,1,it,k),c(2,it),name8
+    1    format(i4,f14.0,4x,a5,4x,a8,'   MxM MFLOPS')
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
diff --git a/src/mxm_wrapper.f b/src/mxm_wrapper.f
new file mode 100644
index 0000000..7c3d111
--- /dev/null
+++ b/src/mxm_wrapper.f
@@ -0,0 +1,165 @@
+      subroutine mxm(a,n1,b,n2,c,n3)
+
+#if defined(XSMM_DISPATCH)
+      USE :: LIBXSMM
+#endif
+
+#define LIBXSMM_DMM1(N, a, b, c)  LIBXSMM_DMM1_str(N, a, b, c) 
+#define LIBXSMM_DMM1_str(N, a, b, c)  libxsmm_dmm_##N##x##N##_##N##_##N(a, b, c)
+#define LIBXSMM_DMM2(N, a, b, c)   LIBXSMM_DMM2_str(N, a, b, c)
+#define LIBXSMM_DMM2_str(N, a, b, c)  libxsmm_dmm_##N##_##N##x##N##_##N(a, b, c)
+#define LIBXSMM_DMM3(N, a, b, c)  LIBXSMM_DMM3_str(N, a, b, c) 
+#define LIBXSMM_DMM3_str(N, a, b, c)  libxsmm_dmm_##N##_##N##_##N(a, b, c)
+
+c
+c     Compute matrix-matrix product C = A*B
+c     for contiguously packed matrices A,B, and C.
+c
+#if defined (MKL)
+#     include  "mkl_direct_call.fi"
+#endif
+      real a(n1,n2),b(n2,n3),c(n1,n3)
+      real alpha, beta
+c
+      include 'SIZE'
+      include 'TOTAL'
+c
+      integer aligned
+      integer K10_mxm
+      integer init, prevn2
+
+#if defined(XSMM_DISPATCH)
+      TYPE(LIBXSMM_DMMFUNCTION) :: xmm1,xmm2,xmm3
+#endif
+
+      data init /0/, prevn2 /0/
+      save init, prevn2
+#if defined(XSMM_DISPATCH)
+      save xsmm1, xsmm2, xsmm3
+#endif
+
+c     write(*,*) "in", init, prevn2, LOC(xsmm1), LOC(xsmm2), LOC(xsmm3)
+
+#if defined (MKL)
+      alpha = 1.0
+      beta = 0.0
+      call dgemm('N','N',n1,n3,n2,alpha,A,n1,B,n2,beta,C,n1)
+#elif defined (BLAS_MXM)
+      alpha = 1.0
+      beta = 0.0
+      call dgemm('N','N',n1,n3,n2,alpha,a,n1,b,n2,beta,c,n1)
+#elif defined (BG)
+      call bg_aligned3(a,b,c,aligned)
+      if (n2.eq.2) then
+         call mxm44_2(a,n1,b,n2,c,n3)
+      else if ((aligned.eq.1) .and.
+     $         (n1.ge.8) .and. (n2.ge.8) .and. (n3.ge.8) .and.
+     $         (modulo(n1,2).eq.0) .and. (modulo(n2,2).eq.0) ) then
+         if (modulo(n3,4).eq.0) then
+            call bg_mxm44(a,n1,b,n2,c,n3)
+         else
+            call bg_mxm44_uneven(a,n1,b,n2,c,n3)
+         endif
+      else if((aligned.eq.1) .and.
+     $        (modulo(n1,6).eq.0) .and. (modulo(n3,6).eq.0) .and.
+     $        (n2.ge.4) .and. (modulo(n2,2).eq.0) ) then
+         call bg_mxm3(a,n1,b,n2,c,n3)
+      else
+         call mxm44_0(a,n1,b,n2,c,n3)
+      endif
+#elif defined (K10_MXM)
+      ! fow now only supported for lx1=8
+      ! tuned for AMD K10
+      ierr = K10_mxm(a,n1,b,n2,c,n3) 
+      if (ierr.gt.0) call mxmf2(a,n1,b,n2,c,n3)
+#elif defined (XSMM_DISPATCH)
+      if (init == 0) then
+        CALL libxsmm_init()
+        init = 1
+        write(*,*) "initializing libxsmm"
+      end if 
+
+      if (prevn2 /= n2) then
+        prevn2 = n2
+
+        CALL libxsmm_dispatch(xmm1, n2, n2, n2*n2, alpha=1D0, beta=0D0)
+c       write(*,*) "initialized xmm1"
+        IF (.NOT. libxsmm_available(xmm1)) THEN
+          write(*,*) "  ** Error: unable to dispatch libxsmm call"
+          STOP 
+        END IF
+
+        CALL libxsmm_dispatch(xmm2, n2, n2, n2, alpha=1D0, beta=0D0)
+c       write(*,*) "initialized xmm2"
+        IF (.NOT. libxsmm_available(xmm2)) THEN
+          write(*,*) "  ** Error: unable to dispatch libxsmm call"
+          STOP 
+        END IF
+
+        CALL libxsmm_dispatch(xmm3, n2*n2, n2, n2, alpha=1D0, beta=0D0)
+c       write(*,*) "initialized xmm3"
+        IF (.NOT. libxsmm_available(xmm3)) THEN
+          write(*,*) "  ** Error: unable to dispatch libxsmm call"
+          STOP 
+        END IF
+      end if
+
+      if (n1 .eq. n2*n2) then
+c       write(*,*) "call to xmm3", n1, n2, n3
+        IF (.NOT. libxsmm_available(xmm3)) THEN
+          write(*,*) "  ** Error: unable to dispatch libxsmm call"
+          STOP 
+        END IF
+        call libxsmm_call(xmm3, C_LOC(a), C_LOC(b), C_LOC(c))
+c       call libxsmm_dmm_256_16_16(a, b, c)
+      else if (n3 .eq. n2*n2) then
+c       write(*,*) "call to xmm1", n1, n2, n3
+        IF (.NOT. libxsmm_available(xmm1)) THEN
+          write(*,*) "  ** Error: unable to dispatch libxsmm call"
+          STOP 
+        END IF
+        call libxsmm_call(xmm1, C_LOC(a), C_LOC(b), C_LOC(c))
+c       call libxsmm_dmm_16_256_16(a, b, c)
+      else
+c       write(*,*) "call to xmm2", n1, n2, n3
+        IF (.NOT. libxsmm_available(xmm2)) THEN
+          write(*,*) "  ** Error: unable to dispatch libxsmm call"
+          STOP 
+        END IF
+        call libxsmm_call(xmm2, C_LOC(a), C_LOC(b), C_LOC(c))
+c       call libxsmm_dmm_16_16_16(a, b, c)
+      end if
+#elif defined (XSMM_FIXED)
+      if (n2 .eq. NPOLY) then
+        if (n1 .eq. n2*n2) then
+          call LIBXSMM_DMM1(NPOLY, a, b, c)
+        else if (n3 .eq. n2*n2) then
+          call LIBXSMM_DMM2(NPOLY, a, b, c)
+        else
+          call LIBXSMM_DMM3(NPOLY, a, b, c)
+        end if
+      else
+        write(*,*) "Invalid matrix size"
+        stop
+      end if
+#elif defined (XSMM)
+      alpha = 1.0
+      beta = 0.0
+      CALL libxsmm_dgemm('N','N',n1,n3,n2,alpha,A,n1,B,n2,beta,C,n1)
+#elif defined (MXMBASIC)
+      do j=1,n3
+        do i=1,n1
+          c(i,j) = 0.0
+          do k=1,n2
+            c(i,j) = c(i,j) + a(i,k)*b(k,j)
+          enddo
+        enddo
+      enddo
+#else
+      call mxmf2(a,n1,b,n2,c,n3)
+#endif
+
+c     write(*,*) "out", init, prevn2, xsmm1, xsmm2, xsmm3
+
+      return
+      end
diff --git a/src/omp.f b/src/omp.f
new file mode 100644
index 0000000..1591214
--- /dev/null
+++ b/src/omp.f
@@ -0,0 +1,128 @@
+#ifdef TIMERS
+#define NBTIMER(a) a = dnekclock()
+#define STIMER(a) a = dnekclock_sync()
+#define ACCUMTIMER(b,a) b = b + (dnekclock()- a)
+#else
+#define NBTIMER(a)
+#define STIMER(a)
+#define ACCUMTIMER(a,b)
+#endif
+
+
+      subroutine rzeroi(a,n,start,fin)
+        implicit none
+  
+        real a(n)
+        integer n, i, start, fin
+
+        do i = start, fin
+          a(i) = 0.0
+        end do 
+
+        return
+      end subroutine
+
+c----------------------------------------------------------
+
+      subroutine copyi(a,b,n, start, fin)
+        implicit none
+
+        real a(n),b(n)
+        integer n, i, start, fin
+
+        do i=start,fin
+          a(i)=b(i)
+        enddo
+
+        return
+      end subroutine
+
+c----------------------------------------------------------
+
+      subroutine glsc3i(val,a,b,mult,n,find,lind)
+      implicit none
+
+      include 'TIMER'
+
+      real val,a(n),b(n),mult(n)
+      real tsum,psum,work(1)
+      integer n,find,lind
+      integer i, tmt, thread
+      integer omp_get_thread_num
+    
+      save psum
+      data psum /0.0/
+
+      thread = 0
+#ifdef _OPENMP
+      thread = omp_get_thread_num()
+#endif
+      tmt = thread + 1
+
+      tsum = 0.0
+      do i=find, lind
+         tsum = tsum + a(i)*b(i)*mult(i)
+      end do
+
+c$OMP ATOMIC update
+      psum = psum + tsum
+c$OMP END ATOMIC
+
+c$OMP BARRIER
+      NBTIMER(ttemp4)
+c$OMP MASTER
+      call gop(psum,work,'+  ',1)
+      val = psum
+      psum = 0.0
+c$OMP END MASTER
+c$OMP BARRIER
+      ACCUMTIMER(tgop(gopi(tmt),tmt), ttemp4)
+
+
+      return
+      end subroutine
+
+c----------------------------------------------------------
+
+      subroutine solveMi(z,r,n,start,fin)
+      implicit none
+
+      real z(n),r(n)
+      integer n,start,fin
+
+      call copyi(z,r,n,start,fin) 
+
+      return
+      end
+
+c----------------------------------------------------------
+
+      subroutine add2s1i(a,b,c1,n,start,fin)
+      implicit none
+
+      real a(n),b(n),c1
+      integer n,start,fin
+      integer i
+
+      do i= start, fin
+        a(i)=c1*a(i)+b(i)
+      end do
+
+      return
+      end subroutine
+
+c----------------------------------------------------------
+
+      subroutine add2s2i(a,b,c1,n,start,fin)
+      implicit none
+ 
+      real a(n),b(n),c1
+      integer n,start,fin
+      integer i
+
+      do i= start,fin
+        a(i)=a(i)+c1*b(i)
+      end do
+
+      return
+      end subroutine
diff --git a/src/prox_dssum.f b/src/prox_dssum.f
new file mode 100644
index 0000000..c3c0402
--- /dev/null
+++ b/src/prox_dssum.f
@@ -0,0 +1,174 @@
+c-----------------------------------------------------------------------
+      subroutine dssum(f)
+      include 'SIZE'
+      include 'TOTAL'
+      real f(1)
+
+c     call nekgsync()
+      call gs_op(gsh,f,1,1,0)  ! Gather-scatter operation  ! w   = QQ  w
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine proxy_setupds(gs_handle)
+      include 'SIZE'
+      include 'INPUT'
+      include 'PARALLEL'
+
+      integer gs_handle,dof
+      integer*8 glo_num(lx1*ly1*lz1*lelt)
+
+      common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal
+
+      t0 = dnekclock()
+
+      call set_vert_box(glo_num) ! Set global-to-local map
+
+      ntot      = nx1*ny1*nz1*nelt
+      call gs_setup(gs_handle,glo_num,ntot,nekcomm,mp) ! Initialize gather-scatter
+      dof = ntot *mp
+      t1 = dnekclock() - t0
+      if (nid.eq.0) then
+         write(6,1) t1,gs_handle,nx1,dof
+    1    format('   setupds time',1pe11.4,' seconds ',2i3,i12)
+      endif
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine set_vert_box(glo_num)
+
+c     Set up global numbering for elements in a box
+
+      include 'SIZE'
+      include 'PARALLEL'
+
+      integer*8 glo_num(1),ii,kg,jg,ig ! The latter 3 for proper promotion
+
+      integer e,ex,ey,ez,eg
+
+      nn = nx1-1  ! nn := polynomial order
+
+      do e=1,nelt
+        eg = lglel(e)                              
+        call get_exyz(ex,ey,ez,eg,nelx,nely,nelz)  
+        do k=0,nn
+        do j=0,nn
+        do i=0,nn
+           kg = nn*(ez-1) + k                     
+           jg = nn*(ey-1) + j                     
+           ig = nn*(ex-1) + i
+           ii = 1 + ig + jg*(nn*nelx+1) + kg*(nn*nelx+1)*(nn*nely+1) 
+           ll = 1 + i + nx1*j + nx1*ny1*k + nx1*ny1*nz1*(e-1)
+           glo_num(ll) = ii
+        enddo
+        enddo
+        enddo
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine get_exyz(ex,ey,ez,eg,nelx,nely,nelz)
+      integer ex,ey,ez,eg
+
+      nelxy = nelx*nely
+ 
+      ez = 1 +  (eg-1)/nelxy
+      ey = mod1 (eg,nelxy)
+      ey = 1 +  (ey-1)/nelx
+      ex = mod1 (eg,nelx)
+ 
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine outmat_glo_num(glo_num)
+      include 'SIZE'
+      include 'INPUT'
+      include 'PARALLEL'
+
+      integer*8 glo_num(lx1*ly1*lz1,lelt)
+
+      integer e
+
+      do e=1,nelt
+         call outmat_e_i8(glo_num(1,e),e)
+      enddo
+ 
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine outmat_e_i8(gn,e)
+      include 'SIZE'
+      include 'INPUT'
+      include 'PARALLEL'
+
+      integer*8 gn(lx1,ly1,lz1)
+
+      integer e
+
+      write(6,*)
+      write(6,2) e
+      write(6,*)
+
+      do k0=3,1,-2
+
+         k1=k0+1
+         do j=ny1,1,-1
+            write(6,1) ((gn(i,j,k),i=1,4),k=k0,k1)
+         enddo
+         write(6,*)
+
+      enddo
+    1 format('gn: ',4i8,3x,4i8)
+    2 format('gn: element: ',i4)
+ 
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine outmat_r(x,name5)
+      include 'SIZE'
+      include 'INPUT'
+      include 'PARALLEL'
+      character*5 name5
+
+      real x(lx1*ly1*lz1,lelt)
+
+      integer e
+
+      do e=1,nelt
+         call outmat_e_r(x(1,e),name5,e)
+      enddo
+ 
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine outmat_e_r(x,name5,e)
+      include 'SIZE'
+      include 'INPUT'
+      include 'PARALLEL'
+      character*5 name5
+
+      real x(lx1,ly1,lz1)
+
+      integer e
+
+      write(6,*)
+      write(6,2) e,name5
+      write(6,*)
+
+      do k0=3,1,-2
+
+         k1=k0+1
+         do j=ny1,1,-1
+            write(6,1) ((x(i,j,k),i=1,4),k=k0,k1)
+         enddo
+         write(6,*)
+
+      enddo
+    1 format('mat: ',4f8.3,3x,4f8.3)
+    2 format('mat: element: ',i4,2x,a5)
+ 
+      return
+      end
+c-----------------------------------------------------------------------
diff --git a/src/prox_setup.f b/src/prox_setup.f
new file mode 100644
index 0000000..fabe8c5
--- /dev/null
+++ b/src/prox_setup.f
@@ -0,0 +1,113 @@
+c-----------------------------------------------------------------------
+      subroutine proxy_setup(a,b,c,d,z,w,g)
+
+      include 'SIZE'
+      include 'TOTAL'
+
+      real a(lx1*lx1),b(lx1),c(lx1*lx1),d(lx1*lx1),z(lx1)
+     $               , w(lx1*2),g(6,lx1*ly1*lz1*lelt)
+
+      call semhat(a,b,c,d,z,w,nx1-1)
+
+      n = nx1*nx1
+      call copy(dxm1,d,n)
+      call transpose(dxtm1,nx1,dxm1,nx1)
+
+      call copy(zgm1,z,nx1)   ! GLL points
+      call copy(wxm1,b,nx1)   ! GLL weights
+
+      call setup_g(g)
+     
+c     m = nx1*ny1*nz1*nelt
+c     call outmat(g,6,m,'gxyz 1',m)
+
+      return
+      end
+c-------------------------------------------------------------------------
+      subroutine setup_g(g)
+
+      include 'SIZE'
+      include 'TOTAL'
+      real g(6,nx1,ny1,nz1,nelt)
+      integer e
+
+      n = nx1*ny1*nz1*nelt
+
+
+      do e=1,nelt
+      do k=1,nz1
+      do j=1,ny1
+      do i=1,nx1
+         call rzero(g(1,i,j,k,e),6)
+         g(1,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k)
+         g(4,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k)
+         g(6,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k)
+         g(6,i,j,k,e) = wxm1(i)*wxm1(j)*wxm1(k)
+      enddo
+      enddo
+      enddo
+      enddo
+
+      return
+      end
+c-------------------------------------------------------------------------
+      subroutine transpose(a,lda,b,ldb)
+      real a(lda,1),b(ldb,1)
+c
+      do j=1,ldb
+         do i=1,lda
+            a(i,j) = b(j,i)
+         enddo
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine outmat(a,m,n,name6,ie)
+      real a(m,n)
+      character*6 name6
+c
+      n10 = min(n,10)
+      write(6,*) 
+      write(6,*) ie,' matrix: ',name6,m,n
+      do i=1,m
+         write(6,6) ie,name6,(a(i,j),j=1,n10)
+      enddo
+    6 format(i3,1x,a6,1p10e12.4)
+      write(6,*) 
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine outmat1(a,m,n,name6,ie)
+      real a(m,n)
+      character*6 name6
+c
+      n10 = min(n,10)
+      write(ie,*) 
+      write(ie,*) ie,' matrix: ',name6,m,n
+      do i=1,m
+         write(ie,6) ie,name6,(a(i,j),j=1,n10)
+      enddo
+    6 format(i3,1x,a6,1p10e12.4)
+      write(ie,*) 
+      return
+      end
+c-----------------------------------------------------------------------
+      function randx(seed)
+
+#ifdef BGQ
+#define M_SIN(X) _sin((X))
+#define M_COS(X) _cos((X))
+#else
+#define M_SIN(X) sin((X))
+#define M_COS(X) cos((X))
+#endif
+
+      arg   = 1.e9*seed
+      arg   = 1.e9*M_COS(arg)
+      randx = M_SIN(arg)
+      seed  = randx
+      seed  = randx
+
+      return
+      end
+c-----------------------------------------------------------------------
diff --git a/src/semhat.f b/src/semhat.f
new file mode 100644
index 0000000..1ad8d23
--- /dev/null
+++ b/src/semhat.f
@@ -0,0 +1,94 @@
+c-----------------------------------------------------------------------
+      subroutine semhat(a,b,c,d,z,w,n)
+c
+c     Generate matrices for single element, 1D operators:
+c
+c        a    = Laplacian
+c        b    = diagonal mass matrix    (GLL weights)
+c        c    = convection operator b*d
+c        d    = derivative matrix
+c        z    = GLL points
+
+      real a(0:n,0:n),b(0:n),c(0:n,0:n),d(0:n,0:n),z(0:n)
+      real w(0:2*n)
+ 
+      np = n+1
+ 
+      call zwgll (z,b,np)
+ 
+      do i=0,n
+         call fd_weights_full(z(i),z,n,1,w)
+         do j=0,n
+            d(i,j) = w(j+np)                   !  Derivative matrix
+         enddo
+      enddo
+
+      call rzero(a,np*np)
+      do j=0,n
+      do i=0,n
+         do k=0,n
+            a(i,j) = a(i,j) + d(k,i)*b(k)*d(k,j)
+         enddo
+         c(i,j) = b(i)*d(i,j)
+      enddo
+      enddo
+
+      return
+      end
+c-----------------------------------------------------------------------
+      subroutine fd_weights_full(xx,x,n,m,c)
+c
+c     This routine evaluates the derivative based on all points
+c     in the stencils.  It is more memory efficient than "fd_weights"
+c
+c     This set of routines comes from the appendix of 
+c     A Practical Guide to Pseudospectral Methods, B. Fornberg
+c     Cambridge Univ. Press, 1996.   (pff)
+c
+c     Input parameters:
+c       xx -- point at wich the approximations are to be accurate
+c       x  -- array of x-ordinates:   x(0:n)
+c       n  -- polynomial degree of interpolant (# of points := n+1)
+c       m  -- highest order of derivative to be approxxmated at xi
+c
+c     Output:
+c       c  -- set of coefficients c(0:n,0:m).
+c             c(j,k) is to be applied at x(j) when
+c             the kth derivative is approxxmated by a 
+c             stencil extending over x(0),x(1),...x(n).
+c
+c
+      real x(0:n),c(0:n,0:m)
+ 
+      c1       = 1.
+      c4       = x(0) - xx
+ 
+      do k=0,m
+      do j=0,n
+         c(j,k) = 0.
+      enddo
+      enddo
+      c(0,0) = 1.
+ 
+      do i=1,n
+         mn = min(i,m)
+         c2 = 1.
+         c5 = c4
+         c4 = x(i)-xx
+         do j=0,i-1
+            c3 = x(i)-x(j)
+            c2 = c2*c3
+            do k=mn,1,-1
+               c(i,k) = c1*(k*c(i-1,k-1)-c5*c(i-1,k))/c2
+            enddo
+            c(i,0) = -c1*c5*c(i-1,0)/c2
+            do k=mn,1,-1
+               c(j,k) = (c4*c(j,k)-k*c(j,k-1))/c3
+            enddo
+            c(j,0) = c4*c(j,0)/c3
+         enddo
+         c1 = c2
+      enddo
+      return
+      end
+c-----------------------------------------------------------------------
diff --git a/src/speclib.f b/src/speclib.f
new file mode 100644
index 0000000..6c86b64
--- /dev/null
+++ b/src/speclib.f
@@ -0,0 +1,1176 @@
+C==============================================================================
+C
+C     LIBRARY ROUTINES FOR SPECTRAL METHODS
+C
+C     March 1989
+C
+C     For questions, comments or suggestions, please contact:
+C
+C     Einar Malvin Ronquist
+C     Room 3-243
+C     Department of Mechanical Engineering
+C     Massachusetts Institute of Technology
+C     77 Massachusetts Avenue
+C     Cambridge, MA 0299
+C     U.S.A.
+C
+C------------------------------------------------------------------------------
+C
+C     ABBRIVIATIONS:
+C
+C     M   - Set of mesh points
+C     Z   - Set of collocation/quadrature points
+C     W   - Set of quadrature weights
+C     H   - Lagrangian interpolant
+C     D   - Derivative operator
+C     I   - Interpolation operator
+C     GL  - Gauss Legendre
+C     GLL - Gauss-Lobatto Legendre
+C     GJ  - Gauss Jacobi
+C     GLJ - Gauss-Lobatto Jacobi
+C
+C
+C     MAIN ROUTINES:
+C
+C     Points and weights:
+C
+C     ZWGL      Compute Gauss Legendre points and weights
+C     ZWGLL     Compute Gauss-Lobatto Legendre points and weights
+C     ZWGJ      Compute Gauss Jacobi points and weights (general)
+C     ZWGLJ     Compute Gauss-Lobatto Jacobi points and weights (general)
+C
+C     Lagrangian interpolants:
+C
+C     HGL       Compute Gauss Legendre Lagrangian interpolant
+C     HGLL      Compute Gauss-Lobatto Legendre Lagrangian interpolant
+C     HGJ       Compute Gauss Jacobi Lagrangian interpolant (general)
+C     HGLJ      Compute Gauss-Lobatto Jacobi Lagrangian interpolant (general)
+C
+C     Derivative operators:
+C
+C     DGLL      Compute Gauss-Lobatto Legendre derivative matrix
+C     DGLLGL    Compute derivative matrix for a staggered mesh (GLL->GL)
+C     DGJ       Compute Gauss Jacobi derivative matrix (general)
+C     DGLJ      Compute Gauss-Lobatto Jacobi derivative matrix (general)
+C     DGLJGJ    Compute derivative matrix for a staggered mesh (GLJ->GJ) (general)
+C
+C     Interpolation operators:
+C
+C     IGLM      Compute interpolation operator GL  -> M
+C     IGLLM     Compute interpolation operator GLL -> M
+C     IGJM      Compute interpolation operator GJ  -> M  (general)
+C     IGLJM     Compute interpolation operator GLJ -> M  (general)
+C
+C     Other:
+C
+C     PNLEG     Compute Legendre polynomial of degree N
+C     PNDLEG    Compute derivative of Legendre polynomial of degree N
+C
+C     Comments:
+C
+C     Note that many of the above routines exist in both single and
+C     double precision. If the name of the single precision routine is
+C     SUB, the double precision version is called SUBD. In most cases
+C     all the "low-level" arithmetic is done in double precision, even
+C     for the single precsion versions.
+C
+C     Useful references:
+C
+C [1] Gabor Szego: Orthogonal Polynomials, American Mathematical Society,
+C     Providence, Rhode Island, 1939.
+C [2] Abramowitz & Stegun: Handbook of Mathematical Functions,
+C     Dover, New York, 1972.
+C [3] Canuto, Hussaini, Quarteroni & Zang: Spectral Methods in Fluid
+C     Dynamics, Springer-Verlag, 1988.
+C
+C
+C==============================================================================
+C
+C--------------------------------------------------------------------
+      SUBROUTINE ZWGL (Z,W,NP)
+C--------------------------------------------------------------------
+C
+C     Generate NP Gauss Legendre points (Z) and weights (W)
+C     associated with Jacobi polynomial P(N)(alpha=0,beta=0).
+C     The polynomial degree N=NP-1.
+C     Z and W are in single precision, but all the arithmetic
+C     operations are done in double precision.
+C
+C--------------------------------------------------------------------
+      REAL Z(1),W(1)
+      ALPHA = 0.
+      BETA  = 0.
+      CALL ZWGJ (Z,W,NP,ALPHA,BETA)
+      RETURN
+      END
+C
+      SUBROUTINE ZWGLL (Z,W,NP)
+C--------------------------------------------------------------------
+C
+C     Generate NP Gauss-Lobatto Legendre points (Z) and weights (W)
+C     associated with Jacobi polynomial P(N)(alpha=0,beta=0).
+C     The polynomial degree N=NP-1.
+C     Z and W are in single precision, but all the arithmetic
+C     operations are done in double precision.
+C
+C--------------------------------------------------------------------
+      REAL Z(1),W(1)
+      ALPHA = 0.
+      BETA  = 0.
+      CALL ZWGLJ (Z,W,NP,ALPHA,BETA)
+      RETURN
+      END
+C
+      SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA)
+C--------------------------------------------------------------------
+C
+C     Generate NP GAUSS JACOBI points (Z) and weights (W)
+C     associated with Jacobi polynomial P(N)(alpha>-1,beta>-1).
+C     The polynomial degree N=NP-1.
+C     Single precision version.
+C
+C--------------------------------------------------------------------
+      PARAMETER (NMAX=84)
+      PARAMETER (NZD = NMAX)
+      REAL*8  ZD(NZD),WD(NZD)
+      REAL Z(1),W(1),ALPHA,BETA
+C
+      NPMAX = NZD
+      IF (NP.GT.NPMAX) THEN
+         WRITE (6,*) 'Too large polynomial degree in ZWGJ'
+         WRITE (6,*) 'Maximum polynomial degree is',NMAX
+         WRITE (6,*) 'Here NP=',NP
+         call exitt
+      ENDIF
+      ALPHAD = ALPHA
+      BETAD  = BETA
+      CALL ZWGJD (ZD,WD,NP,ALPHAD,BETAD)
+      DO 100 I=1,NP
+         Z(I) = ZD(I)
+         W(I) = WD(I)
+ 100  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA)
+C--------------------------------------------------------------------
+C
+C     Generate NP GAUSS JACOBI points (Z) and weights (W)
+C     associated with Jacobi polynomial P(N)(alpha>-1,beta>-1).
+C     The polynomial degree N=NP-1.
+C     Double precision version.
+C
+C--------------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  Z(1),W(1),ALPHA,BETA
+C
+      N     = NP-1
+      DN    = ((N))
+      ONE   = 1.
+      TWO   = 2.
+      APB   = ALPHA+BETA
+C
+      IF (NP.LE.0) THEN
+         WRITE (6,*) 'ZWGJD: Minimum number of Gauss points is 1',np
+         call exitt
+      ENDIF
+      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+         WRITE (6,*) 'ZWGJD: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+C
+      IF (NP.EQ.1) THEN
+         Z(1) = (BETA-ALPHA)/(APB+TWO)
+         W(1) = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)/GAMMAF(APB+TWO)
+     $          * TWO**(APB+ONE)
+         RETURN
+      ENDIF
+C
+      CALL JACG (Z,NP,ALPHA,BETA)
+C
+      NP1   = N+1
+      NP2   = N+2
+      DNP1  = ((NP1))
+      DNP2  = ((NP2))
+      FAC1  = DNP1+ALPHA+BETA+ONE
+      FAC2  = FAC1+DNP1
+      FAC3  = FAC2+ONE
+      FNORM = PNORMJ(NP1,ALPHA,BETA)
+      RCOEF = (FNORM*FAC2*FAC3)/(TWO*FAC1*DNP2)
+      DO 100 I=1,NP
+         CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP2,ALPHA,BETA,Z(I))
+         W(I) = -RCOEF/(P*PDM1)
+ 100  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA)
+C--------------------------------------------------------------------
+C
+C     Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W)
+C     associated with Jacobi polynomial P(N)(alpha>-1,beta>-1).
+C     The polynomial degree N=NP-1.
+C     Single precision version.
+C
+C--------------------------------------------------------------------
+      PARAMETER (NMAX=84)
+      PARAMETER (NZD = NMAX)
+      REAL*8  ZD(NZD),WD(NZD)
+      REAL Z(1),W(1),ALPHA,BETA
+C
+      NPMAX = NZD
+      IF (NP.GT.NPMAX) THEN
+         WRITE (6,*) 'Too large polynomial degree in ZWGLJ'
+         WRITE (6,*) 'Maximum polynomial degree is',NMAX
+         WRITE (6,*) 'Here NP=',NP
+         call exitt
+      ENDIF
+      ALPHAD = ALPHA
+      BETAD  = BETA
+      CALL ZWGLJD (ZD,WD,NP,ALPHAD,BETAD)
+      DO 100 I=1,NP
+         Z(I) = ZD(I)
+         W(I) = WD(I)
+ 100  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA)
+C--------------------------------------------------------------------
+C
+C     Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W)
+C     associated with Jacobi polynomial P(N)(alpha>-1,beta>-1).
+C     The polynomial degree N=NP-1.
+C     Double precision version.
+C
+C--------------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  Z(NP),W(NP),ALPHA,BETA
+C
+      N     = NP-1
+      NM1   = N-1
+      ONE   = 1.
+      TWO   = 2.
+C
+      IF (NP.LE.1) THEN
+       WRITE (6,*) 'ZWGLJD: Minimum number of Gauss-Lobatto points is 2'
+       WRITE (6,*) 'ZWGLJD: alpha,beta:',alpha,beta,np
+       call exitt
+      ENDIF
+      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+         WRITE (6,*) 'ZWGLJD: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+C
+      IF (NM1.GT.0) THEN
+         ALPG  = ALPHA+ONE
+         BETG  = BETA+ONE
+         CALL ZWGJD (Z(2),W(2),NM1,ALPG,BETG)
+      ENDIF
+      Z(1)  = -ONE
+      Z(NP) =  ONE
+      DO 100  I=2,NP-1
+         W(I) = W(I)/(ONE-Z(I)**2)
+ 100  CONTINUE
+      CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(1))
+      W(1)  = ENDW1 (N,ALPHA,BETA)/(TWO*PD)
+      CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(NP))
+      W(NP) = ENDW2 (N,ALPHA,BETA)/(TWO*PD)
+C
+      RETURN
+      END
+C
+      REAL*8  FUNCTION ENDW1 (N,ALPHA,BETA)
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  ALPHA,BETA
+      ZERO  = 0.
+      ONE   = 1.
+      TWO   = 2.
+      THREE = 3.
+      FOUR  = 4.
+      APB   = ALPHA+BETA
+      IF (N.EQ.0) THEN
+         ENDW1 = ZERO
+         RETURN
+      ENDIF
+      F1   = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE)
+      F1   = F1*(APB+TWO)*TWO**(APB+TWO)/TWO
+      IF (N.EQ.1) THEN
+         ENDW1 = F1
+         RETURN
+      ENDIF
+      FINT1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE)
+      FINT1 = FINT1*TWO**(APB+TWO)
+      FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR)
+      FINT2 = FINT2*TWO**(APB+THREE)
+      F2    = (-TWO*(BETA+TWO)*FINT1 + (APB+FOUR)*FINT2)
+     $        * (APB+THREE)/FOUR
+      IF (N.EQ.2) THEN
+         ENDW1 = F2
+         RETURN
+      ENDIF
+      DO 100 I=3,N
+         DI   = ((I-1))
+         ABN  = ALPHA+BETA+DI
+         ABNN = ABN+DI
+         A1   = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE))
+         A2   =  (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO))
+         A3   =  (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE))
+         F3   =  -(A2*F2+A1*F1)/A3
+         F1   = F2
+         F2   = F3
+ 100  CONTINUE
+      ENDW1  = F3
+      RETURN
+      END
+C
+      REAL*8  FUNCTION ENDW2 (N,ALPHA,BETA)
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  ALPHA,BETA
+      ZERO  = 0.
+      ONE   = 1.
+      TWO   = 2.
+      THREE = 3.
+      FOUR  = 4.
+      APB   = ALPHA+BETA
+      IF (N.EQ.0) THEN
+         ENDW2 = ZERO
+         RETURN
+      ENDIF
+      F1   = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE)
+      F1   = F1*(APB+TWO)*TWO**(APB+TWO)/TWO
+      IF (N.EQ.1) THEN
+         ENDW2 = F1
+         RETURN
+      ENDIF
+      FINT1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE)
+      FINT1 = FINT1*TWO**(APB+TWO)
+      FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR)
+      FINT2 = FINT2*TWO**(APB+THREE)
+      F2    = (TWO*(ALPHA+TWO)*FINT1 - (APB+FOUR)*FINT2)
+     $        * (APB+THREE)/FOUR
+      IF (N.EQ.2) THEN
+         ENDW2 = F2
+         RETURN
+      ENDIF
+      DO 100 I=3,N
+         DI   = ((I-1))
+         ABN  = ALPHA+BETA+DI
+         ABNN = ABN+DI
+         A1   =  -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE))
+         A2   =  (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO))
+         A3   =  (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE))
+         F3   =  -(A2*F2+A1*F1)/A3
+         F1   = F2
+         F2   = F3
+ 100  CONTINUE
+      ENDW2  = F3
+      RETURN
+      END
+C
+      REAL*8  FUNCTION GAMMAF (X)
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  X
+      ZERO = 0.0
+      HALF = 0.5
+      ONE  = 1.0
+      TWO  = 2.0
+      FOUR = 4.0
+      PI   = FOUR*ATAN(ONE)
+      GAMMAF = ONE
+      IF (X.EQ.-HALF) GAMMAF = -TWO*SQRT(PI)
+      IF (X.EQ. HALF) GAMMAF =  SQRT(PI)
+      IF (X.EQ. ONE ) GAMMAF =  ONE
+      IF (X.EQ. TWO ) GAMMAF =  ONE
+      IF (X.EQ. 1.5  ) GAMMAF =  SQRT(PI)/2.
+      IF (X.EQ. 2.5) GAMMAF =  1.5*SQRT(PI)/2.
+      IF (X.EQ. 3.5) GAMMAF =  0.5*(2.5*(1.5*SQRT(PI)))
+      IF (X.EQ. 3. ) GAMMAF =  2.
+      IF (X.EQ. 4. ) GAMMAF = 6.
+      IF (X.EQ. 5. ) GAMMAF = 24.
+      IF (X.EQ. 6. ) GAMMAF = 120.
+      RETURN
+      END
+C
+      REAL*8  FUNCTION PNORMJ (N,ALPHA,BETA)
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  ALPHA,BETA
+      ONE   = 1.
+      TWO   = 2.
+      DN    = ((N))
+      CONST = ALPHA+BETA+ONE
+      IF (N.LE.1) THEN
+         PROD   = GAMMAF(DN+ALPHA)*GAMMAF(DN+BETA)
+         PROD   = PROD/(GAMMAF(DN)*GAMMAF(DN+ALPHA+BETA))
+         PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST)
+         RETURN
+      ENDIF
+      PROD  = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)
+      PROD  = PROD/(TWO*(ONE+CONST)*GAMMAF(CONST+ONE))
+      PROD  = PROD*(ONE+ALPHA)*(TWO+ALPHA)
+      PROD  = PROD*(ONE+BETA)*(TWO+BETA)
+      DO 100 I=3,N
+         DINDX = ((I))
+         FRAC  = (DINDX+ALPHA)*(DINDX+BETA)/(DINDX*(DINDX+ALPHA+BETA))
+         PROD  = PROD*FRAC
+ 100  CONTINUE
+      PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST)
+      RETURN
+      END
+C
+      SUBROUTINE JACG (XJAC,NP,ALPHA,BETA)
+C--------------------------------------------------------------------
+C
+C     Compute NP Gauss points XJAC, which are the zeros of the
+C     Jacobi polynomial J(NP) with parameters ALPHA and BETA.
+C     ALPHA and BETA determines the specific type of Gauss points.
+C     Examples:
+C     ALPHA = BETA =  0.0  ->  Legendre points
+C     ALPHA = BETA = -0.5  ->  Chebyshev points
+C
+C--------------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  XJAC(1)
+      DATA KSTOP /10/
+      DATA EPS/1.0e-12/
+      N   = NP-1
+      one = 1.
+      DTH = 4.*ATAN(one)/(2.*((N))+2.)
+      DO 40 J=1,NP
+         IF (J.EQ.1) THEN
+            X = COS((2.*(((J))-1.)+1.)*DTH)
+         ELSE
+            X1 = COS((2.*(((J))-1.)+1.)*DTH)
+            X2 = XLAST
+            X  = (X1+X2)/2.
+         ENDIF
+         DO 30 K=1,KSTOP
+            CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,X)
+            RECSUM = 0.
+            JM = J-1
+            DO 29 I=1,JM
+               RECSUM = RECSUM+1./(X-XJAC(NP-I+1))
+ 29         CONTINUE
+            DELX = -P/(PD-RECSUM*P)
+            X    = X+DELX
+            IF (ABS(DELX) .LT. EPS) GOTO 31
+ 30      CONTINUE
+ 31      CONTINUE
+         XJAC(NP-J+1) = X
+         XLAST        = X
+ 40   CONTINUE
+      DO 200 I=1,NP
+         XMIN = 2.
+         DO 100 J=I,NP
+            IF (XJAC(J).LT.XMIN) THEN
+               XMIN = XJAC(J)
+               JMIN = J
+            ENDIF
+ 100     CONTINUE
+         IF (JMIN.NE.I) THEN
+            SWAP = XJAC(I)
+            XJAC(I) = XJAC(JMIN)
+            XJAC(JMIN) = SWAP
+         ENDIF
+ 200  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE JACOBF (POLY,PDER,POLYM1,PDERM1,POLYM2,PDERM2,
+     $                   N,ALP,BET,X)
+C--------------------------------------------------------------------
+C
+C     Computes the Jacobi polynomial (POLY) and its derivative (PDER)
+C     of degree N at X.
+C
+C--------------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      APB  = ALP+BET
+      POLY = 1.
+      PDER = 0.
+      IF (N .EQ. 0) RETURN
+      POLYL = POLY
+      PDERL = PDER
+      POLY  = (ALP-BET+(APB+2.)*X)/2.
+      PDER  = (APB+2.)/2.
+      IF (N .EQ. 1) RETURN
+      DO 20 K=2,N
+         DK = ((K))
+         A1 = 2.*DK*(DK+APB)*(2.*DK+APB-2.)
+         A2 = (2.*DK+APB-1.)*(ALP**2-BET**2)
+         B3 = (2.*DK+APB-2.)
+         A3 = B3*(B3+1.)*(B3+2.)
+         A4 = 2.*(DK+ALP-1.)*(DK+BET-1.)*(2.*DK+APB)
+         POLYN  = ((A2+A3*X)*POLY-A4*POLYL)/A1
+         PDERN  = ((A2+A3*X)*PDER-A4*PDERL+A3*POLY)/A1
+         PSAVE  = POLYL
+         PDSAVE = PDERL
+         POLYL  = POLY
+         POLY   = POLYN
+         PDERL  = PDER
+         PDER   = PDERN
+ 20   CONTINUE
+      POLYM1 = POLYL
+      PDERM1 = PDERL
+      POLYM2 = PSAVE
+      PDERM2 = PDSAVE
+      RETURN
+      END
+C
+      REAL FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA)
+C---------------------------------------------------------------------
+C
+C     Compute the value of the Lagrangian interpolant HGJ through
+C     the NP Gauss Jacobi points ZGJ at the point Z.
+C     Single precision version.
+C
+C---------------------------------------------------------------------
+      PARAMETER (NMAX=84)
+      PARAMETER (NZD = NMAX)
+      REAL*8  ZD,ZGJD(NZD)
+      REAL Z,ZGJ(1),ALPHA,BETA
+      NPMAX = NZD
+      IF (NP.GT.NPMAX) THEN
+         WRITE (6,*) 'Too large polynomial degree in HGJ'
+         WRITE (6,*) 'Maximum polynomial degree is',NMAX
+         WRITE (6,*) 'Here NP=',NP
+         call exitt
+      ENDIF
+      ZD = Z
+      DO 100 I=1,NP
+         ZGJD(I) = ZGJ(I)
+ 100  CONTINUE
+      ALPHAD = ALPHA
+      BETAD  = BETA
+      HGJ    = HGJD (II,ZD,ZGJD,NP,ALPHAD,BETAD)
+      RETURN
+      END
+C
+      REAL*8  FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA)
+C---------------------------------------------------------------------
+C
+C     Compute the value of the Lagrangian interpolant HGJD through
+C     the NZ Gauss-Lobatto Jacobi points ZGJ at the point Z.
+C     Double precision version.
+C
+C---------------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  Z,ZGJ(1),ALPHA,BETA
+      EPS = 1.e-5
+      ONE = 1.
+      ZI  = ZGJ(II)
+      DZ  = Z-ZI
+      IF (ABS(DZ).LT.EPS) THEN
+         HGJD = ONE
+         RETURN
+      ENDIF
+      CALL JACOBF (PZI,PDZI,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,ZI)
+      CALL JACOBF (PZ,PDZ,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,Z)
+      HGJD  = PZ/(PDZI*(Z-ZI))
+      RETURN
+      END
+C
+      REAL FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA)
+C---------------------------------------------------------------------
+C
+C     Compute the value of the Lagrangian interpolant HGLJ through
+C     the NZ Gauss-Lobatto Jacobi points ZGLJ at the point Z.
+C     Single precision version.
+C
+C---------------------------------------------------------------------
+      PARAMETER (NMAX=84)
+      PARAMETER (NZD = NMAX)
+      REAL*8  ZD,ZGLJD(NZD)
+      REAL Z,ZGLJ(1),ALPHA,BETA
+      NPMAX = NZD
+      IF (NP.GT.NPMAX) THEN
+         WRITE (6,*) 'Too large polynomial degree in HGLJ'
+         WRITE (6,*) 'Maximum polynomial degree is',NMAX
+         WRITE (6,*) 'Here NP=',NP
+         call exitt
+      ENDIF
+      ZD = Z
+      DO 100 I=1,NP
+         ZGLJD(I) = ZGLJ(I)
+ 100  CONTINUE
+      ALPHAD = ALPHA
+      BETAD  = BETA
+      HGLJ   = HGLJD (II,ZD,ZGLJD,NP,ALPHAD,BETAD)
+      RETURN
+      END
+C
+      REAL*8  FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA)
+C---------------------------------------------------------------------
+C
+C     Compute the value of the Lagrangian interpolant HGLJD through
+C     the NZ Gauss-Lobatto Jacobi points ZJACL at the point Z.
+C     Double precision version.
+C
+C---------------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  Z,ZGLJ(1),ALPHA,BETA
+      EPS = 1.e-5
+      ONE = 1.
+      ZI  = ZGLJ(I)
+      DZ  = Z-ZI
+      IF (ABS(DZ).LT.EPS) THEN
+         HGLJD = ONE
+         RETURN
+      ENDIF
+      N      = NP-1
+      DN     = ((N))
+      EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
+      CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,ZI)
+      CONST  = EIGVAL*PI+ALPHA*(ONE+ZI)*PDI-BETA*(ONE-ZI)*PDI
+      CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z)
+      HGLJD  = (ONE-Z**2)*PD/(CONST*(Z-ZI))
+      RETURN
+      END
+C
+      SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
+C-----------------------------------------------------------------
+C
+C     Compute the derivative matrix D and its transpose DT
+C     associated with the Nth order Lagrangian interpolants
+C     through the NZ Gauss Jacobi points Z.
+C     Note: D and DT are square matrices.
+C     Single precision version.
+C
+C-----------------------------------------------------------------
+      PARAMETER (NMAX=84)
+      PARAMETER (NZDD = NMAX)
+      REAL*8  DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD)
+      REAL D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+C
+      IF (NZ.LE.0) THEN
+         WRITE (6,*) 'DGJ: Minimum number of Gauss points is 1'
+         call exitt
+      ENDIF
+      IF (NZ .GT. NMAX) THEN
+         WRITE (6,*) 'Too large polynomial degree in DGJ'
+         WRITE (6,*) 'Maximum polynomial degree is',NMAX
+         WRITE (6,*) 'Here Nz=',Nz
+         call exitt
+      ENDIF
+      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+         WRITE (6,*) 'DGJ: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+      ALPHAD = ALPHA
+      BETAD  = BETA
+      DO 100 I=1,NZ
+         ZD(I) = Z(I)
+ 100  CONTINUE
+      CALL DGJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD)
+      DO 200 I=1,NZ
+      DO 200 J=1,NZ
+         D(I,J)  = DD(I,J)
+         DT(I,J) = DTD(I,J)
+ 200  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
+C-----------------------------------------------------------------
+C
+C     Compute the derivative matrix D and its transpose DT
+C     associated with the Nth order Lagrangian interpolants
+C     through the NZ Gauss Jacobi points Z.
+C     Note: D and DT are square matrices.
+C     Double precision version.
+C
+C-----------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+      N    = NZ-1
+      DN   = ((N))
+      ONE  = 1.
+      TWO  = 2.
+C
+      IF (NZ.LE.1) THEN
+       WRITE (6,*) 'DGJD: Minimum number of Gauss-Lobatto points is 2'
+       call exitt
+      ENDIF
+      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+         WRITE (6,*) 'DGJD: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+C
+      DO 200 I=1,NZ
+      DO 200 J=1,NZ
+         CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(I))
+         CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(J))
+         IF (I.NE.J) D(I,J) = PDI/(PDJ*(Z(I)-Z(J)))
+         IF (I.EQ.J) D(I,J) = ((ALPHA+BETA+TWO)*Z(I)+ALPHA-BETA)/
+     $                        (TWO*(ONE-Z(I)**2))
+         DT(J,I) = D(I,J)
+ 200  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
+C-----------------------------------------------------------------
+C
+C     Compute the derivative matrix D and its transpose DT
+C     associated with the Nth order Lagrangian interpolants
+C     through the NZ Gauss-Lobatto Jacobi points Z.
+C     Note: D and DT are square matrices.
+C     Single precision version.
+C
+C-----------------------------------------------------------------
+      PARAMETER (NMAX=84)
+      PARAMETER (NZDD = NMAX)
+      REAL*8  DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD)
+      REAL D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+C
+      IF (NZ.LE.1) THEN
+       WRITE (6,*) 'DGLJ: Minimum number of Gauss-Lobatto points is 2'
+       call exitt
+      ENDIF
+      IF (NZ .GT. NMAX) THEN
+         WRITE (6,*) 'Too large polynomial degree in DGLJ'
+         WRITE (6,*) 'Maximum polynomial degree is',NMAX
+         WRITE (6,*) 'Here NZ=',NZ
+         call exitt
+      ENDIF
+      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+         WRITE (6,*) 'DGLJ: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+      ALPHAD = ALPHA
+      BETAD  = BETA
+      DO 100 I=1,NZ
+         ZD(I) = Z(I)
+ 100  CONTINUE
+      CALL DGLJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD)
+      DO 200 I=1,NZ
+      DO 200 J=1,NZ
+         D(I,J)  = DD(I,J)
+         DT(I,J) = DTD(I,J)
+ 200  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
+C-----------------------------------------------------------------
+C
+C     Compute the derivative matrix D and its transpose DT
+C     associated with the Nth order Lagrangian interpolants
+C     through the NZ Gauss-Lobatto Jacobi points Z.
+C     Note: D and DT are square matrices.
+C     Double precision version.
+C
+C-----------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+      N    = NZ-1
+      DN   = ((N))
+      ONE  = 1.
+      TWO  = 2.
+      EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
+C
+      IF (NZ.LE.1) THEN
+       WRITE (6,*) 'DGLJD: Minimum number of Gauss-Lobatto points is 2'
+       call exitt
+      ENDIF
+      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+         WRITE (6,*) 'DGLJD: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+C
+      DO 200 I=1,NZ
+      DO 200 J=1,NZ
+         CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(I))
+         CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(J))
+         CI = EIGVAL*PI-(BETA*(ONE-Z(I))-ALPHA*(ONE+Z(I)))*PDI
+         CJ = EIGVAL*PJ-(BETA*(ONE-Z(J))-ALPHA*(ONE+Z(J)))*PDJ
+         IF (I.NE.J) D(I,J) = CI/(CJ*(Z(I)-Z(J)))
+         IF ((I.EQ.J).AND.(I.NE.1).AND.(I.NE.NZ))
+     $   D(I,J) = (ALPHA*(ONE+Z(I))-BETA*(ONE-Z(I)))/
+     $            (TWO*(ONE-Z(I)**2))
+         IF ((I.EQ.J).AND.(I.EQ.1))
+     $   D(I,J) =  (EIGVAL+ALPHA)/(TWO*(BETA+TWO))
+         IF ((I.EQ.J).AND.(I.EQ.NZ))
+     $   D(I,J) = -(EIGVAL+BETA)/(TWO*(ALPHA+TWO))
+         DT(J,I) = D(I,J)
+ 200  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE DGLL (D,DT,Z,NZ,NZD)
+C-----------------------------------------------------------------
+C
+C     Compute the derivative matrix D and its transpose DT
+C     associated with the Nth order Lagrangian interpolants
+C     through the NZ Gauss-Lobatto Legendre points Z.
+C     Note: D and DT are square matrices.
+C
+C-----------------------------------------------------------------
+      PARAMETER (NMAX=84)
+      REAL D(NZD,NZD),DT(NZD,NZD),Z(1)
+      N  = NZ-1
+      IF (NZ .GT. NMAX) THEN
+         WRITE (6,*) 'Subroutine DGLL'
+         WRITE (6,*) 'Maximum polynomial degree =',NMAX
+         WRITE (6,*) 'Polynomial degree         =',NZ
+      ENDIF
+      IF (NZ .EQ. 1) THEN
+         D(1,1) = 0.
+         RETURN
+      ENDIF
+      FN = (N)
+      d0 = FN*(FN+1.)/4.
+      DO 200 I=1,NZ
+      DO 200 J=1,NZ
+         D(I,J) = 0.
+         IF  (I.NE.J) D(I,J) = PNLEG(Z(I),N)/
+     $                        (PNLEG(Z(J),N)*(Z(I)-Z(J)))
+         IF ((I.EQ.J).AND.(I.EQ.1))  D(I,J) = -d0
+         IF ((I.EQ.J).AND.(I.EQ.NZ)) D(I,J) =  d0
+         DT(J,I) = D(I,J)
+ 200  CONTINUE
+      RETURN
+      END
+C
+      REAL FUNCTION HGLL (I,Z,ZGLL,NZ)
+C---------------------------------------------------------------------
+C
+C     Compute the value of the Lagrangian interpolant L through
+C     the NZ Gauss-Lobatto Legendre points ZGLL at the point Z.
+C
+C---------------------------------------------------------------------
+      REAL ZGLL(1)
+      EPS = 1.E-5
+      DZ = Z - ZGLL(I)
+      IF (ABS(DZ) .LT. EPS) THEN
+         HGLL = 1.
+         RETURN
+      ENDIF
+      N = NZ - 1
+      ALFAN = (N)*((N)+1.)
+      HGLL = - (1.-Z*Z)*PNDLEG(Z,N)/
+     $         (ALFAN*PNLEG(ZGLL(I),N)*(Z-ZGLL(I)))
+      RETURN
+      END
+C
+      REAL FUNCTION HGL (I,Z,ZGL,NZ)
+C---------------------------------------------------------------------
+C
+C     Compute the value of the Lagrangian interpolant HGL through
+C     the NZ Gauss Legendre points ZGL at the point Z.
+C
+C---------------------------------------------------------------------
+      REAL ZGL(1)
+      EPS = 1.E-5
+      DZ = Z - ZGL(I)
+      IF (ABS(DZ) .LT. EPS) THEN
+         HGL = 1.
+         RETURN
+      ENDIF
+      N = NZ-1
+      HGL = PNLEG(Z,NZ)/(PNDLEG(ZGL(I),NZ)*(Z-ZGL(I)))
+      RETURN
+      END
+C
+      REAL FUNCTION PNLEG (Z,N)
+C---------------------------------------------------------------------
+C
+C     Compute the value of the Nth order Legendre polynomial at Z.
+C     (Simpler than JACOBF)
+C     Based on the recursion formula for the Legendre polynomials.
+C
+C---------------------------------------------------------------------
+C
+C     This next statement is to overcome the underflow bug in the i860.  
+C     It can be removed at a later date.  11 Aug 1990   pff.
+C
+      IF(ABS(Z) .LT. 1.0E-25) Z = 0.0
+C
+      P1   = 1.
+      IF (N.EQ.0) THEN
+         PNLEG = P1
+         RETURN
+      ENDIF
+      P2   = Z
+      P3   = P2
+      DO 10 K = 1, N-1
+         FK  = (K)
+         P3  = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.)
+         P1  = P2
+         P2  = P3
+ 10   CONTINUE
+      PNLEG = P3
+      if (n.eq.0) pnleg = 1.
+      RETURN
+      END
+C
+      REAL FUNCTION PNDLEG (Z,N)
+C----------------------------------------------------------------------
+C
+C     Compute the derivative of the Nth order Legendre polynomial at Z.
+C     (Simpler than JACOBF)
+C     Based on the recursion formula for the Legendre polynomials.
+C
+C----------------------------------------------------------------------
+      P1   = 1.
+      P2   = Z
+      P1D  = 0.
+      P2D  = 1.
+      P3D  = 1.
+      DO 10 K = 1, N-1
+         FK  = (K)
+         P3  = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.)
+         P3D = ((2.*FK+1.)*P2 + (2.*FK+1.)*Z*P2D - FK*P1D)/(FK+1.)
+         P1  = P2
+         P2  = P3
+         P1D = P2D
+         P2D = P3D
+ 10   CONTINUE
+      PNDLEG = P3D
+      IF (N.eq.0) pndleg = 0.
+      RETURN
+      END
+C
+      SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2)
+C-----------------------------------------------------------------------
+C
+C     Compute the (one-dimensional) derivative matrix D and its
+C     transpose DT associated with taking the derivative of a variable
+C     expanded on a Gauss-Lobatto Legendre mesh (M1), and evaluate its
+C     derivative on a Guass Legendre mesh (M2).
+C     Need the one-dimensional interpolation operator IM12
+C     (see subroutine IGLLGL).
+C     Note: D and DT are rectangular matrices.
+C
+C-----------------------------------------------------------------------
+      REAL D(ND2,ND1), DT(ND1,ND2), ZM1(ND1), ZM2(ND2), IM12(ND2,ND1)
+      IF (NZM1.EQ.1) THEN
+        D (1,1) = 0.
+        DT(1,1) = 0.
+        RETURN
+      ENDIF
+      EPS = 1.E-6
+      NM1 = NZM1-1
+      DO 10 IP = 1, NZM2
+         DO 10 JQ = 1, NZM1
+            ZP = ZM2(IP)
+            ZQ = ZM1(JQ)
+            IF ((ABS(ZP) .LT. EPS).AND.(ABS(ZQ) .LT. EPS)) THEN
+                D(IP,JQ) = 0.
+            ELSE
+                D(IP,JQ) = (PNLEG(ZP,NM1)/PNLEG(ZQ,NM1)
+     $                     -IM12(IP,JQ))/(ZP-ZQ)
+            ENDIF
+            DT(JQ,IP) = D(IP,JQ)
+ 10   CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
+C-----------------------------------------------------------------------
+C
+C     Compute the (one-dimensional) derivative matrix D and its
+C     transpose DT associated with taking the derivative of a variable
+C     expanded on a Gauss-Lobatto Jacobi mesh (M1), and evaluate its
+C     derivative on a Guass Jacobi mesh (M2).
+C     Need the one-dimensional interpolation operator IM12
+C     (see subroutine IGLJGJ).
+C     Note: D and DT are rectangular matrices.
+C     Single precision version.
+C
+C-----------------------------------------------------------------------
+      REAL D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2), IGLG(ND2,ND1)
+      PARAMETER (NMAX=84)
+      PARAMETER (NDD = NMAX)
+      REAL*8  DD(NDD,NDD), DTD(NDD,NDD)
+      REAL*8  ZGD(NDD), ZGLD(NDD), IGLGD(NDD,NDD)
+      REAL*8  ALPHAD, BETAD
+C
+      IF (NPGL.LE.1) THEN
+       WRITE(6,*) 'DGLJGJ: Minimum number of Gauss-Lobatto points is 2'
+       call exitt
+      ENDIF
+      IF (NPGL.GT.NMAX) THEN
+         WRITE(6,*) 'Polynomial degree too high in DGLJGJ'
+         WRITE(6,*) 'Maximum polynomial degree is',NMAX
+         WRITE(6,*) 'Here NPGL=',NPGL
+         call exitt
+      ENDIF
+      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+         WRITE(6,*) 'DGLJGJ: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+C
+      ALPHAD = ALPHA
+      BETAD  = BETA
+      DO 100 I=1,NPG
+         ZGD(I) = ZG(I)
+         DO 100 J=1,NPGL
+            IGLGD(I,J) = IGLG(I,J)
+ 100  CONTINUE
+      DO 200 I=1,NPGL
+         ZGLD(I) = ZGL(I)
+ 200  CONTINUE
+      CALL DGLJGJD (DD,DTD,ZGLD,ZGD,IGLGD,NPGL,NPG,NDD,NDD,ALPHAD,BETAD)
+      DO 300 I=1,NPG
+      DO 300 J=1,NPGL
+         D(I,J)  = DD(I,J)
+         DT(J,I) = DTD(J,I)
+ 300  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
+C-----------------------------------------------------------------------
+C
+C     Compute the (one-dimensional) derivative matrix D and its
+C     transpose DT associated with taking the derivative of a variable
+C     expanded on a Gauss-Lobatto Jacobi mesh (M1), and evaluate its
+C     derivative on a Guass Jacobi mesh (M2).
+C     Need the one-dimensional interpolation operator IM12
+C     (see subroutine IGLJGJ).
+C     Note: D and DT are rectangular matrices.
+C     Double precision version.
+C
+C-----------------------------------------------------------------------
+      IMPLICIT REAL*8  (A-H,O-Z)
+      REAL*8  D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2)
+      REAL*8  IGLG(ND2,ND1), ALPHA, BETA
+C
+      IF (NPGL.LE.1) THEN
+       WRITE(6,*) 'DGLJGJD: Minimum number of Gauss-Lobatto points is 2'
+       call exitt
+      ENDIF
+      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+         WRITE(6,*) 'DGLJGJD: Alpha and Beta must be greater than -1'
+         call exitt
+      ENDIF
+C
+      EPS    = 1.e-6
+      ONE    = 1.
+      TWO    = 2.
+      NGL    = NPGL-1
+      DN     = ((NGL))
+      EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
+C
+      DO 100 I=1,NPG
+      DO 100 J=1,NPGL
+         DZ = ABS(ZG(I)-ZGL(J))
+         IF (DZ.LT.EPS) THEN
+            D(I,J) = (ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)))/
+     $               (TWO*(ONE-ZG(I)**2))
+         ELSE
+            CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZG(I))
+            CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZGL(J))
+            FACI   = ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I))
+            FACJ   = ALPHA*(ONE+ZGL(J))-BETA*(ONE-ZGL(J))
+            CONST  = EIGVAL*PJ+FACJ*PDJ
+            D(I,J) = ((EIGVAL*PI+FACI*PDI)*(ZG(I)-ZGL(J))
+     $               -(ONE-ZG(I)**2)*PDI)/(CONST*(ZG(I)-ZGL(J))**2)
+         ENDIF
+         DT(J,I) = D(I,J)
+ 100  CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
+C----------------------------------------------------------------------
+C
+C     Compute the one-dimensional interpolation operator (matrix) I12
+C     ands its transpose IT12 for interpolating a variable from a
+C     Gauss Legendre mesh (1) to a another mesh M (2).
+C     Z1 : NZ1 Gauss Legendre points.
+C     Z2 : NZ2 points on mesh M.
+C
+C--------------------------------------------------------------------
+      REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2)
+      IF (NZ1 .EQ. 1) THEN
+         I12 (1,1) = 1.
+         IT12(1,1) = 1.
+         RETURN
+      ENDIF
+      DO 10 I=1,NZ2
+         ZI = Z2(I)
+         DO 10 J=1,NZ1
+            I12 (I,J) = HGL(J,ZI,Z1,NZ1)
+            IT12(J,I) = I12(I,J)
+ 10   CONTINUE
+      RETURN
+      END
+c
+      SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
+C----------------------------------------------------------------------
+C
+C     Compute the one-dimensional interpolation operator (matrix) I12
+C     ands its transpose IT12 for interpolating a variable from a
+C     Gauss-Lobatto Legendre mesh (1) to a another mesh M (2).
+C     Z1 : NZ1 Gauss-Lobatto Legendre points.
+C     Z2 : NZ2 points on mesh M.
+C
+C--------------------------------------------------------------------
+      REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2)
+      IF (NZ1 .EQ. 1) THEN
+         I12 (1,1) = 1.
+         IT12(1,1) = 1.
+         RETURN
+      ENDIF
+      DO 10 I=1,NZ2
+         ZI = Z2(I)
+         DO 10 J=1,NZ1
+            I12 (I,J) = HGLL(J,ZI,Z1,NZ1)
+            IT12(J,I) = I12(I,J)
+ 10   CONTINUE
+      RETURN
+      END
+C
+      SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
+C----------------------------------------------------------------------
+C
+C     Compute the one-dimensional interpolation operator (matrix) I12
+C     ands its transpose IT12 for interpolating a variable from a
+C     Gauss Jacobi mesh (1) to a another mesh M (2).
+C     Z1 : NZ1 Gauss Jacobi points.
+C     Z2 : NZ2 points on mesh M.
+C     Single precision version.
+C
+C--------------------------------------------------------------------
+      REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2)
+      IF (NZ1 .EQ. 1) THEN
+         I12 (1,1) = 1.
+         IT12(1,1) = 1.
+         RETURN
+      ENDIF
+      DO 10 I=1,NZ2
+         ZI = Z2(I)
+         DO 10 J=1,NZ1
+            I12 (I,J) = HGJ(J,ZI,Z1,NZ1,ALPHA,BETA)
+            IT12(J,I) = I12(I,J)
+ 10   CONTINUE
+      RETURN
+      END
+c
+      SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
+C----------------------------------------------------------------------
+C
+C     Compute the one-dimensional interpolation operator (matrix) I12
+C     ands its transpose IT12 for interpolating a variable from a
+C     Gauss-Lobatto Jacobi mesh (1) to a another mesh M (2).
+C     Z1 : NZ1 Gauss-Lobatto Jacobi points.
+C     Z2 : NZ2 points on mesh M.
+C     Single precision version.
+C
+C--------------------------------------------------------------------
+      REAL I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2)
+      IF (NZ1 .EQ. 1) THEN
+         I12 (1,1) = 1.
+         IT12(1,1) = 1.
+         RETURN
+      ENDIF
+      DO 10 I=1,NZ2
+         ZI = Z2(I)
+         DO 10 J=1,NZ1
+            I12 (I,J) = HGLJ(J,ZI,Z1,NZ1,ALPHA,BETA)
+            IT12(J,I) = I12(I,J)
+ 10   CONTINUE
+      RETURN
+      END
diff --git a/src/timers.c b/src/timers.c
new file mode 100644
index 0000000..f40bf91
--- /dev/null
+++ b/src/timers.c
@@ -0,0 +1,24 @@
+
+#include <time.h>  /* for struct timespec */
+
+double fclock_gettime(void) {
+
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  double timeval = (double) (ts.tv_sec + ts.tv_nsec*1.0E-9);
+
+  return timeval;
+}
+
+double fclock_gettime_( void ) __attribute__((alias("fclock_gettime")));
+
+#ifdef BGQTIMER
+#include <hwi/include/bqc/A2_inlines.h>
+
+double ReadTimeBase_Double( void ) {
+  return (double) GetTimeBase();
+}
+
+double readtimebase_double( void ) __attribute__((alias("ReadTimeBase_Double")));
+double readtimebase_double_( void ) __attribute__((alias("ReadTimeBase_Double")));
+#endif
diff --git a/test/example1/SIZE b/test/example1/SIZE
new file mode 100644
index 0000000..941aedc
--- /dev/null
+++ b/test/example1/SIZE
@@ -0,0 +1,17 @@
+C     Dimension file to be included
+
+      parameter (ldim=3)                      ! dimension
+      parameter (lx1=12,ly1=lx1,lz1=lx1)      ! polynomial order
+
+      parameter (lp =49152)                 ! max number of processors
+      parameter (lelt= 512)                    ! max number of elements, per proc
+
+      parameter (lelg=lelt*lp)                ! max total elements in a test
+      parameter (lelx=lelg,lely=1,lelz=1)     ! max elements in each direction
+
+      parameter (ldimt=1,ldimt1=ldimt+1)      ! used in 'include' files
+
+
+      common /dimn/ nelx,nely,nelz,nelt       ! local element common block
+     $            , nx1,ny1,nz1,ndim,nfield,nid
+
diff --git a/test/example1/data.rea b/test/example1/data.rea
new file mode 100644
index 0000000..36077f4
--- /dev/null
+++ b/test/example1/data.rea
@@ -0,0 +1,5 @@
+.true. = ifbrick               ! brick or linear geometry
+512 512 1  = iel0,ielN,ielD (per processor)  ! range of number of elements per proc.
+ 9  12 3 = nx0,nxN,nxD         ! poly. order range for nx1
+ 1  1  1 = npx, npy, npz       ! processor distribution in x,y,z
+ 1  1  1 = mx, my, mz          ! local element distribution in x,y,z
diff --git a/test/example1/makefile.template b/test/example1/makefile.template
new file mode 100644
index 0000000..81d8805
--- /dev/null
+++ b/test/example1/makefile.template
@@ -0,0 +1,140 @@
+BINNAME=nekbone
+CASENAME=
+CASEDIR=
+S=
+J:=$S/jl
+OPT_INCDIR:=./
+OBJDIR=obj
+IFMPI= 
+F77=
+CC=
+P=
+PPPO=
+PPS=
+G=
+OPT_FLAGS_STD=
+USR=
+USR_LFLAGS=
+
+################################################################################
+
+lFLAGS = $(USR_LFLAGS)
+
+PPS_F = $(patsubst %,$(PPPO)-D%,$(PPS))
+PPS_C = $(patsubst %,-D%,$(PPS))
+
+#NEW #########################################################################
+EXTRA = cg.o driver.o math.o omp.o mxm_wrapper.o prox_dssum.o prox_setup.o semhat.o \
+speclib.o timers.o
+
+################################################################################
+# MXM 
+MXM = mxm_std.o
+
+# JL Routines ###################################################################
+JO  = jl_
+JL := -DPREFIX=jl_
+
+JLCORE = $(JO)gs.o $(JO)sort.o $(JO)sarray_transfer.o $(JO)sarray_sort.o \
+$(JO)gs_local.o $(JO)crystal.o $(JO)comm.o $(JO)tensor.o $(JO)fail.o \
+$(JO)fcrystal.o
+
+COMM_MPI := comm_mpi.o
+ifeq ($(IFMPI),false)
+  COMM_MPI := ${COMM_MPI} mpi_dummy.o
+endif
+
+ifeq ($(IFMPI),false)
+	DUMMY:= $(shell cp $S/mpi_dummy.h $S/mpif.h) 
+else
+	DUMMY:= $(shell rm -rf $S/mpif.h) 
+endif
+
+#####################################################################################
+TMP0 = $(EXTRA) $(COMM_MPI) $(MXM)
+NOBJS_F0 = $(patsubst %,$(OBJDIR)/%,$(TMP0))
+TMP0c = $(JLCORE)
+NOBJS_C0 = $(patsubst %,$(OBJDIR)/%,$(TMP0c))
+
+NOBJS0 = $(NOBJS_F0) $(NOBJS_C0)
+##############################################################################
+
+L0 = $(G) -O0
+L2 = $(G) $(OPT_FLAGS_STD)
+L3 = $(G) $(OPT_FLAGS_STD)
+L4 = $(L3)
+
+FL0   = $(L0) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR)
+FL2i4 = $(L2)      $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR)
+FL2   = $(L2) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR)
+FL3   = $(L3) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR)
+FL4   = $(L4) $(P) $(PPS_F) -I$(CASEDIR) -I$S -I$(OPT_INCDIR)
+
+cFL0   = $(L0) $(PPS_C) 
+cFL2   = $(L2) $(PPS_C) 
+cFL3   = $(L3) $(PPS_C) 
+cFL4   = $(L4) $(PPS_C) 
+################################################################################
+all : nekbone
+
+objdir: 
+	@mkdir $(OBJDIR) 2>/dev/null; cat /dev/null 
+
+nekbone: 	objdir $(NOBJS0)
+	$(F77) -o ${BINNAME} $G $(NOBJS0) $(lFLAGS)
+	@if test -f ${BINNAME}; then \
+	echo "#############################################################"; \
+	echo "#                  Compilation successful!                  #"; \
+	echo "#############################################################"; \
+        size ${BINNAME}; \
+        echo ""; \
+	else \
+	echo -e "\033[1;31;38m" "ERROR: Compilation failed!"; \
+	echo -e "\033[0m"; \
+	fi
+ifeq ($(IFMPI),false) 
+	@rm -rf $S/mpif.h
+endif
+
+clean:
+	rm -rf ./obj ${BINNAME}
+ifeq ($(IFMPI),false) 
+	@rm -rf $S/mpif.h
+endif
+
+$(NOBJS_F0) : SIZE
+# CORE      ############################################################################
+$(OBJDIR)/cg.o		:$S/cg.f;			$(F77) -c $(FL4) $< -o $@
+$(OBJDIR)/driver.o	:$S/driver.f;			$(F77) -c $(FL2) $< -o $@
+$(OBJDIR)/math.o	:$S/math.f;			$(F77) -c $(FL4) $< -o $@
+$(OBJDIR)/omp.o		:$S/omp.f;			$(F77) -c $(FL4) $< -o $@
+$(OBJDIR)/prox_dssum.o  :$S/prox_dssum.f;		$(F77) -c $(FL2) $< -o $@
+$(OBJDIR)/prox_setup.o	:$S/prox_setup.f;		$(F77) -c $(FL4) $< -o $@
+$(OBJDIR)/semhat.o	:$S/semhat.f;			$(F77) -c $(FL4) $< -o $@
+$(OBJDIR)/speclib.o	:$S/speclib.f;			$(F77) -c $(FL2) $< -o $@
+$(OBJDIR)/blas.o        :$S/blas.f; 		        $(F77) -c $(FL2i4) $< -o $@
+$(OBJDIR)/byte_mpi.o	:$S/byte_mpi.f;			$(F77) -c $(FL2) $< -o $@
+$(OBJDIR)/comm_mpi.o	:$S/comm_mpi.f;			$(F77) -c $(FL2) $< -o $@
+$(OBJDIR)/mpi_dummy.o	:$S/mpi_dummy.f;		$(F77) -c $(FL2) $< -o $@
+# MXM       ############################################################################
+$(OBJDIR)/mxm_wrapper.o	  :$S/mxm_wrapper.f;		$(F77) -c $(FL2) $< -o $@ 
+$(OBJDIR)/mxm_std.o	  :$S/mxm_std.f;		$(F77) -c $(FL4) $< -o $@
+$(OBJDIR)/bg_aligned3.o	  :$S/bg_aligned3.s;		$(CC) -c $< -o $@
+$(OBJDIR)/bg_mxm3.o	  :$S/bg_mxm3.s;		$(CC) -c $< -o $@
+$(OBJDIR)/bg_mxm44.o	  :$S/bg_mxm44.s;		$(CC) -c $< -o $@
+$(OBJDIR)/bg_mxm44_uneven.o :$S/bg_mxm44_uneven.s;	$(CC) -c $< -o $@
+$(OBJDIR)/k10_mxm.o	  :$S/k10_mxm.c;		$(CC)  -c $(cFL2) $(JL) $< -o $@
+# C Files ##################################################################################
+$(OBJDIR)/byte.o                 :$S/byte.c;              $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/chelpers.o             :$S/chelpers.c;          $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/timers.o               :$S/timers.c;            $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)fail.o            :$(J)/fail.c;            $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)tensor.o          :$(J)/tensor.c;          $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)sort.o            :$(J)/sort.c;            $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)sarray_sort.o     :$(J)/sarray_sort.c;     $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)comm.o            :$(J)/comm.c;            $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)crystal.o         :$(J)/crystal.c;         $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)sarray_transfer.o :$(J)/sarray_transfer.c; $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)fcrystal.o        :$(J)/fcrystal.c;        $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)gs.o              :$(J)/gs.c;              $(CC) -c $(cFL2) $(JL) $< -o $@
+$(OBJDIR)/$(JO)gs_local.o        :$(J)/gs_local.c;        $(CC) -c $(cFL2) $(JL) $< -o $@
diff --git a/test/example1/makenek b/test/example1/makenek
new file mode 100755
index 0000000..228bcd1
--- /dev/null
+++ b/test/example1/makenek
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Nek5000 build config file
+# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC
+
+# source path 
+SOURCE_ROOT="$HOME/Nekbone/src" 
+
+# Fortran compiler
+F77="mpif77"
+
+# C compiler
+CC="mpicc"
+
+# pre-processor symbol list 
+# (set PPLIST=? to get a list of available symbols)
+# NEKCOMM, NEKDLAY, BG, MGRID
+#PPLIST="?"
+
+
+# OPTIONAL SETTINGS
+# -----------------
+
+# enable MPI (default true)
+#IFMPI="false"
+
+# auxilliary files to compile
+# NOTE: source files have to located in the same directory as makenek
+#       a makefile_usr.inc has to be provided containing the build rules 
+#USR="foo.o"
+
+# linking flags
+#USR_LFLAGS="-L/usr/lib -lfoo"
+
+# generic compiler flags
+#G="-g"
+
+# optimization flags
+#OPT_FLAGS_STD=""
+#OPT_FLAGS_MAG=""
+
+###############################################################################
+# DONT'T TOUCH WHAT FOLLOWS !!!
+###############################################################################
+# assign version tag
+mver=1
+# overwrite source path with optional 2nd argument
+if [ -d $2 ] && [ $# -eq 2 ]; then
+  SOURCE_ROOT="$2"
+  echo "change source code directory to: ", $SOURCE_ROOT
+fi
+# do some checks and create makefile
+source $SOURCE_ROOT/makenek.inc
+# compile
+make -j4 -f makefile 2>&1 | tee compiler.out
+exit 0
diff --git a/test/example1/makenek-bgq b/test/example1/makenek-bgq
new file mode 100755
index 0000000..5210815
--- /dev/null
+++ b/test/example1/makenek-bgq
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Nek5000 build config file
+# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC
+
+# source path 
+SOURCE_ROOT="../../src"
+
+# Fortran compiler
+F77="mpixlf77_r -qsmp=omp -qnosave"
+
+# C compiler
+CC="mpixlc_r -qsmp=omp"
+
+# pre-processor symbol list 
+# (set PPLIST=? to get a list of available symbols)
+#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8"
+PPLIST="TIMERS BGQTIMERS"
+
+
+# OPTIONAL SETTINGS
+# -----------------
+
+# enable MPI (default true)
+#IFMPI="false"
+
+# auxilliary files to compile
+# NOTE: source files have to located in the same directory as makenek
+#       a makefile_usr.inc has to be provided containing the build rules 
+#USR="foo.o"
+
+# linking flags
+#USR_LFLAGS="-L/usr/lib/ -lfoo"
+
+# generic compiler flags
+#G="-g"
+
+# optimization flags
+OPT_FLAGS_STD="-O3"
+OPT_FLAGS_MAG="-O3"
+
+###############################################################################
+# DONT'T TOUCH WHAT FOLLOWS !!!
+###############################################################################
+# assign version tag
+mver=1
+# overwrite source path with optional 2nd argument
+if [ -d $2 ] && [ $# -eq 2 ]; then
+  SOURCE_ROOT="$2"
+  echo "change source code directory to: ", $SOURCE_ROOT
+fi
+# do some checks and create makefile
+source $SOURCE_ROOT/makenek.inc
+# compile
+make -j4 -f makefile 2>&1 | tee compiler.out
+exit 0
diff --git a/test/example1/makenek-cray-knl b/test/example1/makenek-cray-knl
new file mode 100755
index 0000000..c0658f2
--- /dev/null
+++ b/test/example1/makenek-cray-knl
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Nek5000 build config file
+# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC
+
+# source path 
+SOURCE_ROOT="../../src"
+
+# Fortran compiler
+F77="ftn"
+
+# C compiler
+CC="cc"
+
+# pre-processor symbol list 
+# (set PPLIST=? to get a list of available symbols)
+#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8"
+PPLIST="TIMERS CGTIMERS"
+
+
+# OPTIONAL SETTINGS
+# -----------------
+
+# enable MPI (default true)
+#IFMPI="false"
+
+# auxilliary files to compile
+# NOTE: source files have to located in the same directory as makenek
+#       a makefile_usr.inc has to be provided containing the build rules 
+#USR="foo.o"
+
+# linking flags
+USR_LFLAGS="-qopenmp -dynamic -mcmodel=medium -shared-intel"
+
+# generic compiler flags
+#G="-g"
+
+# optimization flags
+OPT_FLAGS_STD="-qopenmp -dynamic -O3 -g -xMIC-AVX512 -mcmodel=medium -shared-intel "
+OPT_FLAGS_MAG="-qopenmp -dynamic -O3 -g -xMIC-AVX512 -mcmodel=medium -shared-intel"
+
+###############################################################################
+# DONT'T TOUCH WHAT FOLLOWS !!!
+###############################################################################
+# assign version tag
+mver=1
+# overwrite source path with optional 2nd argument
+if [ -d $2 ] && [ $# -eq 2 ]; then
+  SOURCE_ROOT="$2"
+  echo "change source code directory to: ", $SOURCE_ROOT
+fi
+# do some checks and create makefile
+source $SOURCE_ROOT/makenek.inc
+# compile
+make -j4 -f makefile 2>&1 | tee compiler.out
+exit 0
diff --git a/test/example1/makenek-intel b/test/example1/makenek-intel
new file mode 100755
index 0000000..209dfc3
--- /dev/null
+++ b/test/example1/makenek-intel
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Nek5000 build config file
+# (c) 2008,2009,2010 UCHICAGO ARGONNE, LLC
+
+# source path 
+SOURCE_ROOT="../../src"
+
+# Fortran compiler
+F77="mpiifort"
+
+# C compiler
+CC="mpiicc"
+
+# pre-processor symbol list 
+# (set PPLIST=? to get a list of available symbols)
+#PPLSIT="BGQ BGP K10_MXM TIMERS MPITIMER BGQTIMER CGTTIMER NITER=20 LOG MPITHREADS XSMM MXMBASIC MKL BLAS_MXM XSMM_FIXED XSMM_DISPATCH NPOLY=8"
+PPLIST="TIMERS CGTIMERS"
+
+
+# OPTIONAL SETTINGS
+# -----------------
+
+# enable MPI (default true)
+#IFMPI="false"
+
+# auxilliary files to compile
+# NOTE: source files have to located in the same directory as makenek
+#       a makefile_usr.inc has to be provided containing the build rules 
+#USR="foo.o"
+
+# linking flags
+USR_LFLAGS="-qopenmp -mcmodel=medium -shared-intel"
+
+# generic compiler flags
+#G="-g"
+
+# optimization flags
+OPT_FLAGS_STD="-qopenmp -O3 -g -xHost -mcmodel=medium -shared-intel"
+OPT_FLAGS_MAG="-qopenmp -O3 -g -xHost -mcmodel=medium -shared-intel"
+
+###############################################################################
+# DONT'T TOUCH WHAT FOLLOWS !!!
+###############################################################################
+# assign version tag
+mver=1
+# overwrite source path with optional 2nd argument
+if [ -d $2 ] && [ $# -eq 2 ]; then
+  SOURCE_ROOT="$2"
+  echo "change source code directory to: ", $SOURCE_ROOT
+fi
+# do some checks and create makefile
+source $SOURCE_ROOT/makenek.inc
+# compile
+make -j4 -f makefile 2>&1 | tee compiler.out
+exit 0
diff --git a/test/example1/nekpmpi b/test/example1/nekpmpi
new file mode 100755
index 0000000..1c4cc8a
--- /dev/null
+++ b/test/example1/nekpmpi
@@ -0,0 +1,4 @@
+rm -f logfile
+mv $1.log.$2 $1.log1.$2
+mpiexec -np $2 ./nekbone $1 > $1.log.$2
+ln $1.log.$2 logfile