From 947abbba457f198e71248962228a06486f3efc80 Mon Sep 17 00:00:00 2001
From: Marius Pachitariu <marius10p@gmail.com>
Date: Tue, 5 May 2020 09:51:03 -0400
Subject: [PATCH] refinements

---
 configFiles/NP2_kilosortChanMap.mat           | Bin 0 -> 2045 bytes
 .../NeuropixelsPhase3A301_kilosortChanMap.mat | Bin 0 -> 1789 bytes
 ...se3B10x28staggered0x29_kilosortChanMap.mat | Bin 1888 -> 2072 bytes
 .../neuropixPhase3B1_kilosortChanMap_all.mat  | Bin 0 -> 2072 bytes
 mainLoop/runTemplates.m                       |  46 ++--
 preProcess/benchmarks_manipulator.m           |  97 +++++++
 preProcess/clusterSingleBatches.m             |  68 +----
 preProcess/clusterSingleBatchesShift.m        | 243 ++++++++++++++++++
 preProcess/kernelD.m                          |  28 ++
 preProcess/shift_batch_on_disk.m              |  21 +-
 preProcess/shift_from_hist.m                  |  45 ++++
 preProcess/sortBatches2.m                     |   1 +
 12 files changed, 461 insertions(+), 88 deletions(-)
 create mode 100644 configFiles/NP2_kilosortChanMap.mat
 create mode 100644 configFiles/NeuropixelsPhase3A301_kilosortChanMap.mat
 create mode 100644 configFiles/neuropixPhase3B1_kilosortChanMap_all.mat
 create mode 100644 preProcess/benchmarks_manipulator.m
 create mode 100644 preProcess/clusterSingleBatchesShift.m
 create mode 100644 preProcess/kernelD.m
 create mode 100644 preProcess/shift_from_hist.m
diff --git a/configFiles/NP2_kilosortChanMap.mat b/configFiles/NP2_kilosortChanMap.mat
new file mode 100644
index 0000000000000000000000000000000000000000..402458f770e3b676c016c7b12da4abfb4106611c
GIT binary patch
literal 2045
zcma*ndr;Ep8V7LDykAl?gVNKF+`v=_0TB@MLYenU-NLrBfhgIgrbM(AS>Dkgle|@0
z7-sI4)=F8OXiZB5<&xLXG>=MZ*+r$TQ#s?2+TF7=XU@!do_S{8f4+ac-}yua#zq7N
z0p2Jy5E&SYWbRE)^aQph)7i}ZtP~Qkm9ir=3Xk&yD6B*}J23&+pGpE^4yFQuX)FMP
z2QU~C7EQvT0Sp?02L8_h{W0Ls$fb+#BSs+5kCn*cgX|Sc9CMqCOvJLOhHc`b)(}<j
zDabJ2UMx$3c((%9L4hw~gIa9S7RDu8Ns%{<9UUDrjGx{8z?~dEv~YAZAi^%k6e2su
zjFM;jQOu_<NK7j$g(c@smjtIV*STRe4gqPR)hqAh7YknL0K{E$gpa)rw*I4h3)nK6
z*Xmg9*5(#v5f)w8T>=v}mWavA>p~n$>mru_94&a+D}vvui*$JzU6k~48O3;IH;=lq
zC#M!CCL)tUJ;r!fyi=0GeFu9omyo&()NELB16%CtHx;`6NKgK+@5KJb5IG=(L|`b8
z4s_m`R`Xy?VHY1(`b&-1so4nHnllwj$6AblU5m{=tBl7(m2{N8LPH@dUCA9rPK~@a
zvTU5(a>kRsu9K>cA*(F6&sl343;V9ok<(O-DZa5ll}z{0%#xpK)wY~DqxFp^hoz4?
zrL9+`vT13{d1>3zSM>F7XHaivH>{q4eb#EgwsZEY^%fUdyoX%br1Pj-J3+mItFVY-
z74}q)dQK8FCMZ@xC%5eh_u*}B`-fel?*22b{<GNT8IR^!-{UigCwJ7l=T$bTb4wRk
zC!fx@FAP~ocS|4%H(+_^`p}ZI!_E`9^3e%=D|y2E;zuMc&?-*5#_3i?8<HLx9yf>y
z-r3J8yPdUM&?qdekN@^+-0;f&n`O7^8GoOKrW|7=(<rN;`xxi7y-fV=Tx3dO<!n=v
zASGqbQQr#MX5WDU6nwBRm2*4a?{(t-&)O#$?+=|Yc1aLsM{;-+mx;6(=h2YO#6hoG
zQa`Sp_{<`TEj&{-7JKFF8}=STPn;V6@ZQ0@;@=Dn#4g^CmE#N=P1_=W@W8Z&2mx^B
zH-bLe1P}ou1_x*eQES$Rtz*)%I=tNYt(iF>oyhrLhG;hUOL@Dz(@M74f&RI`$|!d>
zX;|F9)wI-GplpOR8#9ZYpq5obEC0H?n(dx_;BcNP)#_KjM%XR+I12C-1tM~J)zreF
zy>8}1$L&{0EF}YDmxMs{c1P6p`T&=mkI!cCX=|`?tw%WNC00Wd(A6I)@Dhu@T4L2y
zXBr%x-FA(E*i6tMFU)0*=$`AI_g;d!ym*L~VXHP>OEk+Twntu=xv*Q8ci^G--Y~Nx
z*pl=J25g^J4O6qS+oj`(7a^qRY`CuHB#fWvZben$ZCQEK<T0dw$flUCm$+I#KWK#g
zTIHVQ-w_||ZJ0gn!$m;DWj}ZEIPYk7xsKSP(qy{-l{+2A-|Gb%$@bU7D1+TqnS8qC
z<!dzTXM$q+8-3yRq~f|D<H5%QFrSGz5T}N}XJvH`jj!E}4r|2xwl{LeCv2hT0jezc
zeOhJs&~uZEzjVj0ec4rrazqP5mCSXkv??zYD}R#P`lQw)y1Z=Ogkj#1X!we(zd929
zHj(j&);skk&tA;X)q!1u+qEYI5HY=m&9JkC|AY%WC(3c)Sed%HRf|L-hvvwU!y})c
z`A*(Dejg3En+F+PC|w}m$1EJzI2lz{KR)3g9E*t`i*XgkIAB>VFI;*2jkOOPoB@j5
zfhH;T3s@9|u3O`CSRTHiRq@}VRC_*m+hXD#UC<F*?cP@xW%GkWAo7Qp{C{kWF8YKM
z!30SW%g%N4CpLCvmEF8#zy$X?X^Yt+CJ<6jZo{K)!JBs?MEbi+KF6lQ{Xbz(WLn3g
zXRTqG8}l&y(Qk^CUqq=v=f%n}{}Dwf!~HHw5jy{_lCi;xDqrIy$+_&wAbL<0;xDSW
zZ<plVA}eC9G5ogZtu=Gb8okjftsIV*<km~&;mHhNU#$Ygk>uZ(Dm`8?{LKh*n62LZ
z`YdVx419ljj##Slbsi@m2#R~s_H|JQ@+4A?8CSlUt3+@+_HYfV?BJ?=D#ytqb1ox#
z;}v)0Me~o=qV(A2wA`z^#!1a{t9<S#-p;omQ}u}B>G}8ORsQpxesA=^Ool4qbPE&z
z$dl=Pe!#{%pJ;<EzpG+=uZN~)SWzDbIW_S%A(`pn)b8lACVnDv&tVU0SJ?KZ!f)%D
z2lr8X%=g7TCG~rEAL+lHX_J!hRTG~Mr>2{w^(l<+#0E{YT~RXa0E*+?|3wplr2PJC
z#_rDpBVb$h>1@Fvo&}uLOSkn=By1*hZ##u=Gmw&g>QL6QDqX@ALLcr5aTuy*mn{W9
zHmjL7G|^}AzIsA<@YRzO6=w>-*3{~GQ<1Y>rWe8vF**7CGQ7N|HlV}{-O|g?0-J!g
zbIb^axYwWY|G|+u>JN?-0z+`d-gKWK=<#O|4T{GD6QFn}Q*&DG95k~68X-aJ)=tz~
Qf(~bZgRMdE9HZ%f12(shoB#j-

literal 0
HcmV?d00001

diff --git a/configFiles/NeuropixelsPhase3A301_kilosortChanMap.mat b/configFiles/NeuropixelsPhase3A301_kilosortChanMap.mat
new file mode 100644
index 0000000000000000000000000000000000000000..eba6d3c0406aef8d766fca83778d9e4e2ac3ebf4
GIT binary patch
literal 1789
zcma*mc{Cf?9tZF!(vJNNBGgn+t^LFn5?g7|n8dLJtwBRcWCryjRK;tnEuyt0DT)jg
zf~alKqET8~RYO`;Eu(aarIe7guY(LPI?Ma(o%8NF_nv$1y}x_zx##=AxM9(5?hrdm
zC<Nn%wZIc&qs<|{u@PkaSyCJV;(I(8<pZ-dha4wGN06hVAZOzdkbtCkh+6^)Vhw{p
z;Rq`j!p0V24Yh_s{?7*ZtI41+s?xdqJ^&zvX)%7TTUAa&eqH!4Cw^M_{g-Wo6Il1k
zVcrQgmhThf@%<@fGk?=6o(B8dRef!b5)8f7n$AM%f1)^4P+S(vw>O!+W0=AA!>vZP
zTybf(K(Pm3a5E8&YVn%ONsqGuJ}G_0JrX_3h(dJ=kk&@DVg1@r(i6kLaH8mO^QFrQ
zCJj{9HIt^RbZ?W!oYoOkaJal?p#Le&z`%eoWxE$7+VpbWO62EIRu9lJspN?rIm1)u
zr+41L#QKtjrsP~u$h{!5#yih~99P3?ujC;<d`u0SdE;sSVHw}Cv=G$M6_&Y{y2L4-
z-C9~v`F)1dD+APtYT?eT9y+d+)Vw_-lu^}L&{sVef0gE`z50**RNng`@K4N*BPo4I
zh7+;1)+z29!D$5^fU$Y4#HTz&xA9<)LdD%NJ;mLz=^5c0^&a8kd-mr4t}!Y~ee&z<
zM$?yWp{(B4@S~#50a8Us1UoQjo3}W5az*iU2;sgmY&xo~Ic@de+}7BayRVk_|0;>$
zR!CyAeomf>%Rl|`4*$Y_$?~l^T@lw+H+i69hcq#l2Vc|MMZB#J*h!q$Ca8b*I_yz2
ztn-*=;cq|@P?<C{l2`eO+-@3Z)8L{8^|GI_f)~wLXONY0gG%=^_4X<;J~(gSJmQs6
z2QFlhu-Wx~7WH1a!xAo9qWab|YkV&Z_R^lhe18Uq8^%T9aI)jNN@_ZCCo9v!Zt1F}
zTREx)A6{J4Vr$qy-R9f78rfb}G%y$sn8aIH?Ukcs{>YL(`dx$uU|m&mwYM)`Zr?*3
zRIPJVIvfCANk5h__9j{Ga@pG0gE+Y>5062>2Y_EseBFeCm86R}2v{j|`}O%bA%!B$
z2%eqoP;+<v&fU|eFL-AgwEJZ)K>xR01G5Xe4v4KOi6$$RA4;<hT(%`XkA6V?#%5sv
zi!QA5t#Z4r;+&aNBH}rs5UA#sF%VamG$J27BYL2yC(sfp-i~b<b$P^mwpi+T==?mx
z-F(0fHud(B`dF7BlTx12PX}GU!SSy}m08MqqHkIq#Yfg4Gjx<g$V*mHyc>jBR4Xbu
zBkAs|`GNq;&UZ!o!+(QR2pEB|SNMuEV@rvn-{p6UP!XJ#_Msz=x(R*o;+X`#`q``V
zEuq!uLIh@-tPPFq%Yj>IB^sGd#5`pk&(}Xw)79)Kk+m)V`QYA%s{VzOIV=wbt_vx;
zgc&%&QJg?{j@9tb?z~n_e|OK&)Jt92urQ{s?#57{cU?QGAEddOJrV=<sAPXmOm!8C
z&%5v!#ZG)zVOcs|W5kHLpr*&1*i9;fO!;HIeH|KZIrQSN+iwRw*gdV^0IQ9Ys*M>-
z6V+8|^u}9}^g8pfOjGN?LU!r$k9C|PDMYg$oIl$=^)<|$k704}q<LafMo|c{G578?
zQ3%a)?4#w8+Ew!Y*^q~Tk8L1UShv;5i7W=QfgAAY>2pr%2a7qP&$&#OEkuAT;e#-C
z(cvXvQF|#(3mnz5Ps<b=MXj{7<|Sxx2iTs*uyNz!t{KfHa^dY<*wD$Bos&sT_G2mP
zXz~?QJeg63Nxp7UbMHl5{JAXR8o8K@vF~?YOR6YY`*P{jp-%-_&a8o|L*mY<^7U$X
zx%EpYvGb}nKjaNuiuiiCs5RKUefTzrGP*LmE~OiS@6rqkiN4d(@XO>L;-pCmf7@lf
zGC%F3dc_uRv1gqvr7JKyy*-5gA@<H_N00cvUTjCd4_C5ul)Jjo_LXk$)~Il2y;gQd
z)NxT##&k6>8bMB-$R+u?jI-RaIo^KoaYMs;Bp#dFPzQQZ<vEcP;^(-Gd_A0(>gTG1
z?2pmNH9T61d>#YpqG^p*d9>#QJwvGc{{CWiLH5^Jxc<%5_)n(Cc#j7Fzy{wQOJ5c0
zgPZD9hFC1}XMFD;02u+_#QjJGAWaPXrwU+-Qm!xT<d^o+W%Ktn|3f?B=%3mj3Or>t
z0MgP<guKCC>3?6a4v2(7766eT-~n8QNIU(c61oj4Fj}ZlIJS9#!}i#eFVW<{2Nh(r
Uvj;gtc|*WVB_+Uwx`Nt&0LSGy2LJ#7

literal 0
HcmV?d00001

diff --git a/configFiles/NeuropixelsPhase3B10x28staggered0x29_kilosortChanMap.mat b/configFiles/NeuropixelsPhase3B10x28staggered0x29_kilosortChanMap.mat
index 3e73840452ec6f3f21d28064d34f69e86ad78e00..5e9277fbd348ef5e726d20deb5925c166b331d44 100644
GIT binary patch
delta 494
zcmaFBH$z~8i9~Q|o`PdRk%Ez#f{~Gxv9Xn@p@NZtk-@}3<%tQb8%riJF`7=k%cfAj
z&g0y9eJ{<lgr-xO#nImVj~Wj%BxqSy@G~$lG%zwSRLs%6pcs&pD8TR_zVcM?{9jFt
z4h9lDY|S8wgN?Z{q2L(@OIpc0jypY#2|yW^rh`C=gQW?=V+N~ZX?|$;{@wq7bLY;z
z$-6sSvBgQP#nqDU|EK!z(Ldt1KXdx<-!1IVzrw0tn}2D$-1mKEc4OHI!O&STh2H(?
zZa$sQ7VVU=eiOXxnC1057oWEp8bA73VRv4SJ!Hk@MQ3gM_ZU5#tTH*p?B?PlhUqaE
z9yT&M-}|Z5dnu!vseR6c2a}|C*?c_4-nV7LK{MCB6_=iLMOWDvCwXm=(5-)VBXPcw
z>A9WH`t0<3W(uZ1&Y2g>w<2J<>+EW|%G5*Zo^y`OeiQg;hOyp@4+k0)-`7mkTk0Jx
zV)3MfZTg)HPdm-+I8G;4Hs^N91TH-(y8TN<+L0+4s!=C59=1!(np1pMwl+?}_^_5I
zs;MWlHtjkor`l(!{J4papGV@;Iq{cKySBMDI{Rf5oaBw2SMj)cgWR?M42IgLCvN6u
d>0@FvpFE3ApZnmIo+d_ChOjbri^(_GIslVI**pLM

delta 336
zcmV-W0k8g;5a14wG#FH9bs#}-av(A@ATT*PGBY|gG9WTAGBA-*BavVQv2>CGe@p-X
z0C=3^V_;ygU}j)o1JWD}5E=wnq5K9$1_mGkV|E~}NY2kMN-1Vw0V!ez5ehWLCm3GP
z6c2UxPhi}@NJ|{SRKY|`Jk;F}0HL(M9uEKj0IdK30C=43(9I12Q4qlKdAq)n-F*oa
zPyt6LwgEJN3pZpFPCgE<XaEhMe*rXr2G9T+Kmq=X7B(*4?@cC?%*<i5h-6KX2stL7
z?7#509x4(3JEBCl=6UUUM?!uRbFSrDB~;PE029oy!WIXdal;d)ZQ2Q>kU<_rRM12p
zW6Tir{v|fp<Af_7FdV~)BZ)L}D4>i6dKh7f1=iT%hzsseTIrzo|L_0X2m8MO9^uyS
iv&RIE0s%XdRtGN+LDh^jz(^vXwfO*0*9I7qkOzo==Y*vI

diff --git a/configFiles/neuropixPhase3B1_kilosortChanMap_all.mat b/configFiles/neuropixPhase3B1_kilosortChanMap_all.mat
new file mode 100644
index 0000000000000000000000000000000000000000..d03bfa4b04a97a831b4630466f7d1f7d939578a2
GIT binary patch
literal 2072
zcma*ocTm%58VB$MFa!hyL<|c0i!4PTA%#>zji5+VKsUBEArM#vDMkd1O6VW~sZtdT
zQqI+LiGpZ6q=f_sK~M}TvJ?YYsNQ)Xvn<iOy}P-YoqcBBdEa;5_xsHI$7kmCb_wuu
zaRqQl6yWU=fMER?7Hk9T3uCZZ5z*mfU?25+4<Edp4M2?!X0U^UfQVCMAmB_e;LBtK
zXdHkhk#Phv4g+9NXcF)z3-s>-dw6eHTc1+_fnv)LIcM0L)eMrGv)A!OQ+M|B&uhV?
z?pd(mOg+9R6ozXBv|L@L16*5lP#UWF`^esRRSgUbE~>t1`plUeKD&B;)Y(fn2?7&c
zVEKp>9jMT$YXV4FS#EyGm3;R|)=o>TOy4=OV*8c{sX4q46M#h%)PkTl0k>Te`+_z7
z(pn8FEjuiIH1_&s^ykBK>+|`PO|^Rr3TnMJ{l$;>?pcNT{aSC6cYfKS?>16ZxAdpc
zwhSay+wn<=P!H>|v>IG^sHgqQ0j?UkKfRX?&$+|q+dE8o*q$3m{o5zLlPXLM<ifl#
zR9F`(C9f6gj>&kM2`~7$3Y|6YMc<KE)MHSM;jyc+iN!sEc%vQ$a<NE8rSzCmx>OA7
z(>f@kaY{>`4P$3Ft=FF-)jYVMC9BVP{u2Z7o+g9f>(ixS3~Sju<(0fwCuTv#w*K<4
zu)SN@dP^vJFKj6lcD(w)u$`Sl&dxh-pM!sq%fLDddfOK@u1BY}aYU2Gqn4e--pw3|
zhEH_HK-s9xBvG~w8J*tE>3G3uYvgo(K00dUG-v8Gk8Pf_Zl1TlG-vVRLGSSusrIRb
z4Xe?YU#)bmzSa;P7r=t<!c$6~qXfmn#tOTuqY8X0MS;7%grK{uKP2B_c)zFv!SL`r
z^b+HKcrd!KF<}$0KDXd@;DK9*hPMpfE4+W3dF;JW_yuMdox0WNB(qfhjD>GZMuZ2K
z%{Mgg!ox$(+ZWMy+Yb#P&0ju071NmN@G&^zi~PmKf1Hh1H3`a1^o~iRnkXXujYs$F
zCcQ*glLzfONpCcK*tvP-V*xeAGwcxJz@c7z+oLm0{J$#&Cj7TR_t{`(JN?<zOo|?#
zIZ+EXb?=nN^I&{N6`QH6X<lcyx1=ITKW06|(z3FmqC&sfdn9h;%NzU2N0%O>04u1g
z%C&-3%45vxC7Gd0d1YU`e(som;F!N@uD?Dux@FolEpu1(6MbWVD%Ph9avYr3EHWl`
z5aPt1j;)gaw^(WO*3x&Km0x;SK&935S|9Bb`YwnQ{;3al`DHKLRls?nUK>mLGZnj@
zCKTSgp=4<HG1OOmjUkLYkbI}TpEvWV!XoO^4Z?*fPp4&UJXb3aHLnHd?n=RAj((TZ
z^VMP%DkVUK`H#h1k=?&6W}{M@dYF#uY2qD*f~1=^Op-NC!vE^g>YyOyK1JgH6Vt(W
zQA@U9EL&9BDi@pM1<ALCV$U$9y}ew5j1i<h7WP<wU^=N2#c-WPtG~{ZBj(H_qLTPR
zslD+y(Sj&>B<$Sjb2>#Jl&N#XyE#1;oURa#(&P6WDWPnfGO}PYvZ%WGfjE1m-3+;i
zZH`R7b#$EEys(w<Kr*}1f;hEkaVaYG(Tdb*rQ2a<5tzFu4Z708!nfP7aHT`qxKxrh
z_G*)q`NN{osf+7reXfQLX>JHslqap<udpFA7!eX@O?$fcU_-`%+pIGuX#>!chhC8f
zasB588@bxyLEjt_x(plc72FPI9^oq;%Afj(<U`09+~8M-Ji*n+ADPF$42^(w*jEyH
zXVWyy$<G)%1WC|tV*maue1{TaRMy#qjpY+THN@w3HKbvqUUuOIaG!eBd!<Ce+K4S1
zXVU4!C*mhhAXTLJqP%pl7OipxQemviMO)}vOisSNX@0e;+BqMMYI&BK0A2?=7^6;9
z#&`c7hk^p}fC3a~2!Ya*7mT<xu$KTeVWz0obY8kxchhZYUGb(mQimqkD7~Xj<ddSg
zrNb<Jdb-Z7Zf6O|nWR0hY={26oxJ7SYhZvK^seiYwlfJ(Vd^#$P0_Zmu(rt><^)ZM
z0SM%!3aL>Baeo|)0q*SSmD%py$)YE8{doF_XZ9Gk4Mevh#VW(`;IIEdAADaRLl^YR
z@whJk>&ppTXB12elBk@Z5LGTcQUO(Cv1g#KGr9D&6SahvwF#=0@NK91^`2VdsN?L+
z>aXSH<wM$!PQsI1&6C_nI;#txkNop^qr!D=)&1bguTkfg1<TgEM!Xexd9_&YK*`Z4
z&+HI)uXM$YCS&^#{?#JVi`P#llhN3*rC3K9Qazw{;D*#8TZkFW`z|jCKg4fGhaBx2
z<$|eimUi&^LochOmi7ITyY(SuUZkFN?`{j;EvvSFBIC$83N~=>Js3hEXI#hNdYcqk
zj<%V_dLiRs*N*Ab?yGStJ-;n3xYfd=tiANZ{;?t~a%%2LJbadugS_c^#DF-F1iAXU
z?`=B45!%g&OAOC2-hWfFsqs@Rt35x|?6BnLHpk!-Wv8Vk7k+?Y+pIkPx2xnt{X@w%
rvn<khg3bml*h25->BA2j+_+Sq&`62)v1_s8iFdaRu7c3k-8sJj+?1HQ

literal 0
HcmV?d00001

diff --git a/mainLoop/runTemplates.m b/mainLoop/runTemplates.m
index 7f588a1f..ebdf6f6a 100644
--- a/mainLoop/runTemplates.m
+++ b/mainLoop/runTemplates.m
@@ -73,26 +73,28 @@
 Nchan = rez.ops.Nchan;
 nt0 = rez.ops.nt0;
 
-nKeep = min(Nchan*3,20); % how many PCs to keep
-rez.W_a = zeros(nt0 * Nrank, nKeep, Nfilt, 'single');
-rez.W_b = zeros(Nbatches, nKeep, Nfilt, 'single');
-rez.U_a = zeros(Nchan* Nrank, nKeep, Nfilt, 'single');
-rez.U_b = zeros(Nbatches, nKeep, Nfilt, 'single');
-for j = 1:Nfilt
-    % do this for every template separately
-    WA = reshape(rez.WA(:, j, :, :), [], Nbatches);
-    WA = gpuArray(WA); % svd on the GPU was faster for this, but the Python randomized CPU version might be faster still
-    [A, B, C] = svdecon(WA);
-    % W_a times W_b results in a reconstruction of the time components
-    rez.W_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep));
-    rez.W_b(:,:,j) = gather(C(:, 1:nKeep));
+if 0
+    nKeep = min(Nchan*3,20); % how many PCs to keep
+    rez.W_a = zeros(nt0 * Nrank, nKeep, Nfilt, 'single');
+    rez.W_b = zeros(Nbatches, nKeep, Nfilt, 'single');
+    rez.U_a = zeros(Nchan* Nrank, nKeep, Nfilt, 'single');
+    rez.U_b = zeros(Nbatches, nKeep, Nfilt, 'single');
+    for j = 1:Nfilt
+        % do this for every template separately
+        WA = reshape(rez.WA(:, j, :, :), [], Nbatches);
+        WA = gpuArray(WA); % svd on the GPU was faster for this, but the Python randomized CPU version might be faster still
+        [A, B, C] = svdecon(WA);
+        % W_a times W_b results in a reconstruction of the time components
+        rez.W_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep));
+        rez.W_b(:,:,j) = gather(C(:, 1:nKeep));
+        
+        UA = reshape(rez.UA(:, j, :, :), [], Nbatches);
+        UA = gpuArray(UA);
+        [A, B, C] = svdecon(UA);
+        % U_a times U_b results in a reconstruction of the time components
+        rez.U_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep));
+        rez.U_b(:,:,j) = gather(C(:, 1:nKeep));
+    end
     
-    UA = reshape(rez.UA(:, j, :, :), [], Nbatches);
-    UA = gpuArray(UA);
-    [A, B, C] = svdecon(UA);
-    % U_a times U_b results in a reconstruction of the time components
-    rez.U_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep));
-    rez.U_b(:,:,j) = gather(C(:, 1:nKeep));
-end
-
-fprintf('Finished compressing time-varying templates \n')
\ No newline at end of file
+    fprintf('Finished compressing time-varying templates \n')
+end
\ No newline at end of file
diff --git a/preProcess/benchmarks_manipulator.m b/preProcess/benchmarks_manipulator.m
new file mode 100644
index 00000000..84cc6f93
--- /dev/null
+++ b/preProcess/benchmarks_manipulator.m
@@ -0,0 +1,97 @@
+% benchmark manipulator
+addpath(genpath('D:\GitHub\KiloSort2')) % path to kilosort folder
+addpath('D:\GitHub\npy-matlab')
+
+pathToYourConfigFile = 'D:\GitHub\KiloSort2\configFiles'; % take from Github folder and put it somewhere else (together with the master_file)
+run(fullfile(pathToYourConfigFile, 'configFile384.m'))
+rootH = 'H:\';
+ops.fproc       = fullfile(rootH, 'temp_wh.dat'); % proc file on a fast SSD
+
+root0 = 'F:\Spikes\manipulator';
+chanmaps = {'neuropixPhase3B1_kilosortChanMap_all.mat', 'NP2_kilosortChanMap.mat'};
+for j = 1:2
+   chanmaps{j} = fullfile(root0, chanmaps{j}); 
+end
+
+addpath('D:\Drive\CODE\KS2')
+%%
+dt = 1;
+root0 = 'F:\Spikes\manipulator';
+dr = [];
+cc = [];
+cc2 = {};
+%%
+for iprobe = 2
+    for isess = [3]
+        for ishift = [1 2 3]
+            rootZ = fullfile(root0, sprintf('drift%d', isess), sprintf('p%d', iprobe));
+            if ishift<3
+                fname = fullfile(rootZ, sprintf('rez2_%d.mat', ishift-1));
+                load(fname)
+                 dr{iprobe, isess} = rez.row_shifts;
+            else
+                 fname = fullfile(rootZ, 'rez_datashift.mat');
+                 load(fname)
+            end
+            
+            bin = ops.fs * dt;
+            st  = rez.st3(:,1);
+            clu = rez.st3(:,2);
+            nspikes = length(st);
+            S = sparse(clu, ceil(st/bin), ones(nspikes,1));
+            igood = get_good_units(rez);
+            S = S(igood, :);
+            nbins = size(S,2);
+            
+            root_drift = fullfile(root0, sprintf('drift%d', isess));
+            dy = readNPY(fullfile(root_drift, 'manip.positions.npy'));
+            ts = readNPY(fullfile(root_drift, sprintf('manip.timestamps_p%d.npy', iprobe)));
+            dy_samps = interp1(ts, dy, dt/2 + [0:dt:dt*(nbins-1)]);
+            dy_samps(isnan(dy_samps)) = 0;
+            
+            ix = ceil(ts(2)/dt):ceil(ts(end)/dt);
+            ix2 = [1:ceil(ts(2)/dt)  ceil(ts(end)/dt):nbins];
+            
+            ix2(length(ix)+1:end) = [];
+            
+            cc{iprobe, isess, ishift}  = corr(dy_samps(ix)', S(:, ix)');
+            cc2{iprobe, isess, ishift} = corr(dy_samps(ix(1:length(ix2)))', S(:, ix2)');            
+        end
+    end
+end
+%%
+iprobe = 2;
+isess = 3;
+ishift = 1;
+icc1 = cc{iprobe, isess, ishift};
+icc2 = cc2{iprobe, isess, ishift};
+
+cq = quantile(icc2, [.025, .975]);
+NN = length(icc1);
+nbad = sum(icc1<cq(1)) + sum(icc1>cq(2));
+disp([NN-nbad, nbad, nbad/NN])
+%%
+csd = cellfun(@(x) mean(abs(x)), cc);
+c2sd = cellfun(@(x) mean(abs(x)), cc2);
+%%
+sq(mean(csd, 2))
+sq(mean(c2sd, 2))
+
+%%
+iprobe = 2;
+isess = 3;
+ishift = 1;
+rootZ = fullfile(root0, sprintf('drift%d', isess), sprintf('p%d', iprobe));
+fname = fullfile(rootZ, sprintf('rez2_%d.mat', ishift-1));
+load(fname)
+%%
+
+imagesc(rez.ccb, [-5, 5])
+%%
+cellfun(@(x) numel(x), cc)
+
+
+
+%%
+
+
diff --git a/preProcess/clusterSingleBatches.m b/preProcess/clusterSingleBatches.m
index db3ea93a..3eb0d466 100644
--- a/preProcess/clusterSingleBatches.m
+++ b/preProcess/clusterSingleBatches.m
@@ -100,61 +100,10 @@
         fprintf('time %2.2f, pre clustered %d / %d batches \n', toc, ibatch, nBatches)
     end
 end
-%%
-% find Z offsets
-% anothr one of these Params variables transporting parameters to the C++ code
-Params  = [1 NrankPC Nfilt 0 size(W,1) 0 NchanNear Nchan];
-
-if isfield(ops, 'midpoint')
-    splits = [0, ceil(ops.midpoint/ops.NT), nBatches];
-else
-    splits = [0, nBatches];
-end
-for k = 1:length(splits)-1
-    ib = splits(k)+1:splits(k+1);
-    Params(1) = size(Ws,3) * length(ib); % the total number of templates is the number of templates per batch times the number of batches
-    [iminy{k}, ww{k}, Ns{k}] = find_integer_shifts(Params, Whs(:, ib),Ws(:,:,:,ib),...
-        mus(:, ib), ns(:,ib), iC, Nchan, Nfilt);
-end
-    
-if isfield(ops, 'midpoint')
-    iChan = 1:Nchan;
-    iUp = mod(iChan + 2-1, Nchan)+1;
-    iDown = mod(iChan - 2-1, Nchan)+1;
-    iMap = [iUp(iUp); iUp; iChan; iDown; iDown(iDown)];
-    
-    
-    mu1 = 1e-5 + sq(sum(sum(ww{1}.^2, 1),2)).^.5;
-    mu2 = 1e-5 + sq(sum(sum(ww{2}.^2, 1),2)).^.5;
-    
-    CC = gpuArray.zeros(Nfilt, Nfilt, 5);
-    for k = 1:5
-        W0 = ww{1};
-        for t = 1:Nfilt
-            W0(:,iMap(k,:),t) = W0(:,:,t);
-        end
-        X1 = reshape(W0, [nPCs * Nchan, Nfilt]);
-        X2 = reshape(ww{2}, [nPCs * Nchan, Nfilt]);
-        CC(:,:, k) = X1' * X2 ./ (mu1 * mu2');
-        %     CC(:,:, k) = 2 * X1' * X2 - mu1.^2 - mu2'.^2;
-    end
-    
-    csum = sq(mean(max(CC.* Ns{2}' , [], 1), 2));
-    % csum = sq(mean(max(CC , [], 1), 2));
-    [cmax, imax] = max(csum);
-    imin = cat(2, iminy{1}, iminy{2} + (imax-3));
-    imin = min(5, max(1, imin));
-    
-    disp(imax)
-else
-   imin = iminy{1}; 
-end
+
 %%
 Params(1) = size(Ws,3) * size(Ws,4); % the total number of templates is the number of templates per batch times the number of batches
 
-Whs2 = mod(Whs + 2 * int32(imin-3) - 1, Nchan) + 1;
-rez.row_shifts = imin - 3;
-
 tic
 
 % initialize dissimilarity matrix
@@ -162,7 +111,7 @@
 
 for ibatch = 1:nBatches
     % for every batch, compute in parallel its dissimilarity to ALL other batches
-    Wh0 = single(Whs2(:, ibatch)); % this one is the primary batch
+    Wh0 = single(Whs(:, ibatch)); % this one is the primary batch
     mu = mus(:, ibatch);
 
     % embed the templates from the primary batch back into a full, sparse representation
@@ -175,7 +124,7 @@
     iMatch = sq(min(abs(single(iC) - reshape(Wh0, 1, 1, [])), [], 1))<.1;
 
     % compute dissimilarities for iMatch = 1
-    [iclust, ds] = mexDistances2(Params, Ws, W, iMatch, iC-1, Whs2-1, mus, mu);
+    [iclust, ds] = mexDistances2(Params, Ws, W, iMatch, iC-1, Whs-1, mus, mu);
 
     % ds are squared Euclidian distances
     ds = reshape(ds, Nfilt, []); % this should just be an Nfilt-long vector
@@ -215,13 +164,6 @@
 rez.iorig = gather(iorig);
 rez.ccbsort = gather(ccbsort);
 
+% rez.iorig = randperm(nBatches);
+
 fprintf('time %2.2f, Re-ordered %d batches. \n', toc, nBatches)
-%%
-nup = 0;
-for ibatch = 1:nBatches
-    if abs(imin(ibatch) - 3) > 0
-        shift_batch_on_disk(rez, ibatch, imin(ibatch) - 3);
-        nup = nup + 1;
-    end
-end
-fprintf('time %2.2f, Shifted up/down %d batches. \n', toc, nup)
diff --git a/preProcess/clusterSingleBatchesShift.m b/preProcess/clusterSingleBatchesShift.m
new file mode 100644
index 00000000..0f317653
--- /dev/null
+++ b/preProcess/clusterSingleBatchesShift.m
@@ -0,0 +1,243 @@
+function rez = clusterSingleBatchesShift(rez)
+% outputs an ordering of the batches according to drift
+% for each batch, it extracts spikes as threshold crossings and clusters them with kmeans
+% the resulting cluster means are then compared for all pairs of batches, and a dissimilarity score is assigned to each pair
+% the matrix of similarity scores is then re-ordered so that low dissimilaity is along the diagonal
+
+
+rng('default'); rng(1);
+
+ops = rez.ops;
+
+if getOr(ops, 'reorder', 0)==0
+    rez.iorig = 1:rez.temp.Nbatch; % if reordering is turned off, return consecutive order
+    return;
+end
+
+nPCs    = getOr(rez.ops, 'nPCs', 3);
+Nfilt = ceil(rez.ops.Nchan/2);
+tic
+wPCA    = extractPCfromSnippets(rez, nPCs); % extract PCA waveforms pooled over channels
+fprintf('Obtained 7 PC waveforms in %2.2f seconds \n', toc) % 7 is the default, and I don't think it needs to be able to change
+
+Nchan = rez.ops.Nchan;
+niter = 10; % iterations for k-means. we won't run it to convergence to save time
+
+nBatches      = rez.temp.Nbatch;
+NchanNear = min(Nchan, 2*8+1);
+
+% initialize big arrays on the GPU to hold the results from each batch
+Ws = gpuArray.zeros(nPCs , NchanNear, Nfilt, nBatches, 'single'); % this holds the unit norm templates
+mus = gpuArray.zeros(Nfilt, nBatches, 'single'); % this holds the scalings
+ns = gpuArray.zeros(Nfilt, nBatches, 'single'); % this holds the number of spikes for that cluster
+Whs = gpuArray.ones(Nfilt, nBatches, 'int32'); % this holds the center channel for each template
+
+i0 = 0;
+
+NrankPC = 3; % I am not sure if this gets used, but it goes into the function
+
+iC = getClosestChannels(rez, ops.sigmaMask, NchanNear); % return an array of closest channels for each channel
+
+tic
+for ibatch = 1:nBatches
+    [uproj, call] = extractPCbatch2(rez, wPCA, min(nBatches-1, ibatch), iC); % extract spikes using PCA waveforms
+    % call contains the center channels for each spike
+
+    if sum(isnan(uproj(:)))>0 %sum(mus(:,ibatch)<.1)>30
+        break; % I am not sure what case this safeguards against....
+    end
+
+    if size(uproj,2)>Nfilt
+       % if a batch has at least as many spikes as templates we request, then cluster it
+        [W, mu, Wheights, irand] = initializeWdata2(call, uproj, Nchan, nPCs, Nfilt, iC); % this initialize the k-means
+
+        % Params is a whole bunch of parameters sent to the C++ scripts inside a float64 vector
+        Params  = [size(uproj,2) NrankPC Nfilt 0 size(W,1) 0 NchanNear Nchan];
+
+        for i = 1:niter
+            Wheights = reshape(Wheights, 1,1,[]); % this gets reshaped for broadcasting purposes
+            % we only compute distances to clusters on the same channels
+            iMatch = sq(min(abs(single(iC) - Wheights), [], 1))<.1; % this tells us which spikes and which clusters might match
+
+            % get iclust and update W
+            [dWU, iclust, dx, nsp, dV] = mexClustering2(Params, uproj, W, mu, ...
+                call-1, iMatch, iC-1); % CUDA script to efficiently compute distances for pairs in which iMatch is 1
+
+            dWU = dWU./(1e-5 + single(nsp')); % divide the cumulative waveform by the number of spikes
+
+            mu = sum(dWU.^2,1).^.5; % norm of cluster template
+            W = dWU./(1e-5 + mu); % unit normalize templates
+
+            W = reshape(W, nPCs, Nchan, Nfilt);
+            nW = sq(W(1, :, :).^2); % compute best channel from the square of the first PC feature
+            W = reshape(W, Nchan * nPCs, Nfilt);
+
+            [~, Wheights] = max(nW,[], 1); % the new best channel of each cluster template
+        end
+
+        % carefully keep track of cluster templates in dense format
+        W = reshape(W, nPCs, Nchan, Nfilt);
+        W0 = gpuArray.zeros(nPCs, NchanNear, Nfilt, 'single');
+        for t = 1:Nfilt
+            W0(:, :, t) = W(:, iC(:, Wheights(t)), t);
+        end
+        W0 = W0 ./ (1e-5 + sum(sum(W0.^2,1),2).^.5); % I don't really know why this needs another normalization
+    end
+
+    if exist('W0', 'var')
+        % if a batch doesn't have enough spikes, it gets the cluster templates of the previous batch
+        Ws(:, :, :, ibatch)   = W0;
+        mus(:, ibatch)     = mu;
+        ns(:, ibatch)      = nsp;
+        Whs(:, ibatch)     = int32(Wheights);
+    else
+      % if the first batch doesn't have enough spikes, then it is skipped completely
+        warning('data batch #%d only had %d spikes \n', ibatch, size(uproj,2))
+    end
+    i0 = i0 + Nfilt;
+
+    if rem(ibatch, 500)==1
+        fprintf('time %2.2f, pre clustered %d / %d batches \n', toc, ibatch, nBatches)
+    end
+end
+%%
+
+if ops.shift_data    
+    % find Z offsets
+    % anothr one of these Params variables transporting parameters to the C++ code
+    Params  = [1 NrankPC Nfilt 0 size(W,1) 0 NchanNear Nchan];
+    
+    if isfield(ops, 'midpoint')
+        splits = [0, ceil(ops.midpoint/ops.NT), nBatches];
+    else
+        splits = [0, nBatches];
+    end
+    for k = 1:length(splits)-1
+        ib = splits(k)+1:splits(k+1);
+        Params(1) = size(Ws,3) * length(ib); % the total number of templates is the number of templates per batch times the number of batches
+        [iminy{k}, ww{k}, Ns{k}] = find_integer_shifts(Params, Whs(:, ib),Ws(:,:,:,ib),...
+            mus(:, ib), ns(:,ib), iC, Nchan, Nfilt);
+    end
+    
+    if isfield(ops, 'midpoint')
+        iChan = 1:Nchan;
+        iUp = mod(iChan + 2-1, Nchan)+1;
+        iDown = mod(iChan - 2-1, Nchan)+1;
+        iMap = [iUp(iUp); iUp; iChan; iDown; iDown(iDown)];
+        
+        
+        mu1 = 1e-5 + sq(sum(sum(ww{1}.^2, 1),2)).^.5;
+        mu2 = 1e-5 + sq(sum(sum(ww{2}.^2, 1),2)).^.5;
+        
+        CC = gpuArray.zeros(Nfilt, Nfilt, 5);
+        for k = 1:5
+            W0 = ww{1};
+            for t = 1:Nfilt
+                W0(:,iMap(k,:),t) = W0(:,:,t);
+            end
+            X1 = reshape(W0, [nPCs * Nchan, Nfilt]);
+            X2 = reshape(ww{2}, [nPCs * Nchan, Nfilt]);
+            CC(:,:, k) = X1' * X2 ./ (mu1 * mu2');
+            %     CC(:,:, k) = 2 * X1' * X2 - mu1.^2 - mu2'.^2;
+        end
+        
+        csum = sq(mean(max(CC.* Ns{2}' , [], 1), 2));
+        % csum = sq(mean(max(CC , [], 1), 2));
+        [cmax, imax] = max(csum);
+        imin = cat(2, iminy{1}, iminy{2} + (imax-3));
+        imin = min(5, max(1, imin));
+        
+        disp(imax)
+    else
+        imin = iminy{1};
+    end
+    
+    imin = imin - 3;
+    figure(263);
+    plot(imin);
+    drawnow
+else
+    imin = zeros(1, nBatches);
+end
+
+% imin(:) = 0;
+
+%%
+Params(1) = size(Ws,3) * size(Ws,4); % the total number of templates is the number of templates per batch times the number of batches
+
+Whs2 = mod(Whs + 2 * int32(imin) - 1, Nchan) + 1;
+rez.row_shifts = imin;
+
+
+tic
+
+% initialize dissimilarity matrix
+ccb = gpuArray.zeros(nBatches, 'single');
+
+for ibatch = 1:nBatches
+    % for every batch, compute in parallel its dissimilarity to ALL other batches
+    Wh0 = single(Whs2(:, ibatch)); % this one is the primary batch
+    mu = mus(:, ibatch);
+
+    % embed the templates from the primary batch back into a full, sparse representation
+    W = gpuArray.zeros(nPCs , Nchan, Nfilt, 'single');
+    for t = 1:Nfilt
+        W(:, iC(:, Wh0(t)), t) = Ws(:, :, t, ibatch);
+    end
+
+    % pairs of templates that live on the same channels are potential "matches"
+    iMatch = sq(min(abs(single(iC) - reshape(Wh0, 1, 1, [])), [], 1))<.1;
+
+    % compute dissimilarities for iMatch = 1
+    [iclust, ds] = mexDistances2(Params, Ws, W, iMatch, iC-1, Whs2-1, mus, mu);
+
+    % ds are squared Euclidian distances
+    ds = reshape(ds, Nfilt, []); % this should just be an Nfilt-long vector
+    ds = max(0, ds);
+    ccb(ibatch,:) = mean(sqrt(ds) .* ns, 1)./mean(ns,1); % weigh the distances according to number of spikes in cluster
+
+    if rem(ibatch, 500)==1
+        fprintf('time %2.2f, compared %d / %d batches \n', toc, ibatch, nBatches)
+    end
+end
+
+% some normalization steps are needed: zscoring, and symmetrizing ccb
+ccb0 = zscore(ccb, 1, 1);
+ccb0 = ccb0 + ccb0';
+
+rez.ccb = gather(ccb0);
+
+% sort by manifold embedding algorithm
+% iorig is the sorting of the batches
+% ccbsort is the resorted matrix (useful for diagnosing drift)
+[ccbsort, iorig] = sortBatches2(ccb0);
+
+%% some mandatory diagnostic plots to understand drift in this dataset
+figure;
+subplot(1,2,1)
+imagesc(ccb0, [-5 5]); drawnow
+xlabel('batches')
+ylabel('batches')
+title('batch to batch distance')
+
+subplot(1,2,2)
+imagesc(ccbsort, [-5 5]); drawnow
+xlabel('sorted batches')
+ylabel('sorted batches')
+title('AFTER sorting')
+
+rez.iorig = gather(iorig);
+rez.ccbsort = gather(ccbsort);
+
+% rez.iorig = randperm(nBatches);
+
+fprintf('time %2.2f, Re-ordered %d batches. \n', toc, nBatches)
+%%
+nup = 0;
+for ibatch = 1:nBatches
+    if abs(imin(ibatch)) > 0
+        shift_batch_on_disk(rez, ibatch, imin(ibatch));
+        nup = nup + 1;
+    end
+end
+fprintf('time %2.2f, Shifted up/down %d batches. \n', toc, nup)
diff --git a/preProcess/kernelD.m b/preProcess/kernelD.m
new file mode 100644
index 00000000..f9c24a3d
--- /dev/null
+++ b/preProcess/kernelD.m
@@ -0,0 +1,28 @@
+function K = kernelD(xp0,yp0,len)
+
+D  = size(xp0,1);
+N  = size(xp0,2); 
+M  = size(yp0,2);
+
+% split M into chunks if on GPU to reduce memory usage
+if isa(xp0,'gpuArray') 
+    K=gpuArray.zeros(N,M);
+    cs  = 60;
+elseif N > 10000
+    K = zeros(N,M);
+    cs = 10000;
+else
+    K= zeros(N,M);
+    cs  = M;
+end
+
+for i = 1:ceil(M/cs)
+    ii = [((i-1)*cs+1):min(M,i*cs)];
+    mM = length(ii);
+    xp = repmat(xp0,1,1,mM);
+    yp = reshape(repmat(yp0(:,ii),N,1),D,N,mM);
+
+    Kn = exp( -sum(bsxfun(@times,(xp - yp).^2,1./(len.^2))/2,1));
+    K(:,ii)  = squeeze(Kn); 
+    
+end
diff --git a/preProcess/shift_batch_on_disk.m b/preProcess/shift_batch_on_disk.m
index 11f2a047..cd5935f1 100644
--- a/preProcess/shift_batch_on_disk.m
+++ b/preProcess/shift_batch_on_disk.m
@@ -18,10 +18,25 @@ function shift_batch_on_disk(rez, ibatch, shift)
 fseek(fid, offset, 'bof');
 dat = fread(fid, [NT ops.Nchan], '*int16');
 
-iChan = 1:ops.Nchan;
-iChanS = mod(iChan + 2 * shift - 1, ops.Nchan) + 1;
-dat(:, iChanS) = dat;
+ishift = round(shift);
+shift = shift - ishift;
 
+dat = reshape(dat, [NT, 2, ops.Nchan/2]);
+dat = circshift(dat, ishift, 3);
+
+if shift > 0
+    alpha = shift;
+    datup = circshift(dat, 1, 3);
+    dat = dat * (1-alpha) + alpha * datup;
+else
+    alpha = -shift;
+    datup = circshift(dat, -1, 3);
+    dat = dat * (1-alpha) + alpha * datup;
+end
+
+if alpha<0 || alpha>1
+   disp(alpha)
+end
 fseek(fid, offset, 'bof');
 fwrite(fid, dat, 'int16'); % write this batch to binary file
 
diff --git a/preProcess/shift_from_hist.m b/preProcess/shift_from_hist.m
new file mode 100644
index 00000000..120ab58b
--- /dev/null
+++ b/preProcess/shift_from_hist.m
@@ -0,0 +1,45 @@
+function [st3, imin] = shift_from_hist(rez, flag)
+
+ymin = min(rez.yc);
+ymax = max(rez.yc);
+xmin = min(rez.xc);
+
+if flag
+    rez.ops.yup = ymin:7.5:ymax; % centers of the upsampled y positions
+    rez.ops.xup = xmin + [0 16 32]; % centers of the upsampled x positions
+else
+    rez.ops.yup = ymin:10:ymax; % centers of the upsampled y positions
+    rez.ops.xup = [11 27 43 59]; %[0 16 32]; % centers of the upsampled x positions
+end
+
+spkTh = 10; % same as "template amplitude" with generic templates
+
+st3 = standalone_detector(rez, spkTh);
+
+dd = 5;
+dep = st3(:,2);
+dmin = ymin - 1;
+dep = dep - dmin;
+
+dmax  = 1 + ceil(max(dep)/dd);
+ops = rez.ops;
+dt = ops.NT;
+NT = max(st3(:,1));
+Nbatches = ceil(NT/dt);
+
+batch_id = ceil(st3(:,1)/dt);
+
+F = zeros(dmax, 20, Nbatches);
+for t = 1:Nbatches
+    ix = find(batch_id==t);
+    dep = st3(ix,2) - dmin;
+    amp = log10(min(99, st3(ix,3))) - log10(spkTh);
+    amp = amp / (log10(100) - log10(spkTh));
+    
+    M = sparse(ceil(dep/dd), ceil(1e-5 + amp * 20), ones(numel(ix), 1), dmax, 20);    
+    F(:, :, t) = log2(1+M);
+end
+
+[imin, F0] = align_block(F);
+
+imin = imin * 5;
diff --git a/preProcess/sortBatches2.m b/preProcess/sortBatches2.m
index 15d4a36a..c11539d9 100644
--- a/preProcess/sortBatches2.m
+++ b/preProcess/sortBatches2.m
@@ -8,6 +8,7 @@
 ccb0 = gpuArray(ccb0);
 
 % compute its svd on the GPU (this might also be fast enough on CPU)
+% [u, s, v] = svd(ccb0);
 [u, s, v] = svdecon(ccb0);
 
 % initialize the positions xs of the batch embeddings to be very small but proportional to the first PC