From 947abbba457f198e71248962228a06486f3efc80 Mon Sep 17 00:00:00 2001 From: Marius Pachitariu Date: Tue, 5 May 2020 09:51:03 -0400 Subject: [PATCH] refinements --- configFiles/NP2_kilosortChanMap.mat | Bin 0 -> 2045 bytes .../NeuropixelsPhase3A301_kilosortChanMap.mat | Bin 0 -> 1789 bytes ...se3B10x28staggered0x29_kilosortChanMap.mat | Bin 1888 -> 2072 bytes .../neuropixPhase3B1_kilosortChanMap_all.mat | Bin 0 -> 2072 bytes mainLoop/runTemplates.m | 46 ++-- preProcess/benchmarks_manipulator.m | 97 +++++++ preProcess/clusterSingleBatches.m | 68 +---- preProcess/clusterSingleBatchesShift.m | 243 ++++++++++++++++++ preProcess/kernelD.m | 28 ++ preProcess/shift_batch_on_disk.m | 21 +- preProcess/shift_from_hist.m | 45 ++++ preProcess/sortBatches2.m | 1 + 12 files changed, 461 insertions(+), 88 deletions(-) create mode 100644 configFiles/NP2_kilosortChanMap.mat create mode 100644 configFiles/NeuropixelsPhase3A301_kilosortChanMap.mat create mode 100644 configFiles/neuropixPhase3B1_kilosortChanMap_all.mat create mode 100644 preProcess/benchmarks_manipulator.m create mode 100644 preProcess/clusterSingleBatchesShift.m create mode 100644 preProcess/kernelD.m create mode 100644 preProcess/shift_from_hist.m diff --git a/configFiles/NP2_kilosortChanMap.mat b/configFiles/NP2_kilosortChanMap.mat new file mode 100644 index 0000000000000000000000000000000000000000..402458f770e3b676c016c7b12da4abfb4106611c GIT binary patch literal 2045 zcma*ndr;Ep8V7LDykAl?gVNKF+`v=_0TB@MLYenU-NLrBfhgIgrbM(AS>Dkgle|@0 z7-sI4)=F8OXiZB5<&xLXG>=MZ*+r$TQ#s?2+TF7=XU@!do_S{8f4+ac-}yua#zq7N z0p2Jy5E&SYWbRE)^aQph)7i}ZtP~Qkm9ir=3Xk&yD6B*}J23&+pGpE^4yFQuX)FMP z2QU~C7EQvT0Sp?02L8_h{W0Ls$fb+#BSs+5kCn*cgX|Sc9CMqCOvJLOhHc`b)(}=v}mWavA>p~n$>mru_94&a+D}vvui*$JzU6k~48O3;IH;=lq zC#M!CCL)tUJ;r!fyi=0GeFu9omyo&()NELB16%CtHx;`6NKgK+@5KJb5IG=(L|`b8 z4s_m`R`Xy?VHY1(`b&-1so4nHnllwj$6AblU5m{=tBl7(m2{N8LPH@dUCA9rPK~@a zvTU5(a>kRsu9K>cA*(F6&sl343;V9ok<(O-DZa5ll}z{0%#xpK)wY~DqxFp^hoz4? zrL9+`vT13{d1>3zSM>F7XHaivH>{q4eb#EgwsZEY^%fUdyoX%br1Pj-J3+mItFVY- z74}q)dQK8FCMZ@xC%5eh_u*}B`-fel?*22b{y z-r3J8yPdUM&?qdekN@^+-0;f&n`O7^8GoOKrW|7=(oyhrLhG;hUOL@Dz(@M74f&RI`$|!d> zX;|F9)wI-GplpOR8#9ZYpq5obEC0H?n(dx_;BcNP)#_KjM%XR+I12C-1tM~J)zreF zy>8}1$L&{0EF}YDmxMs{c1P6p`T&=mkI!cCX=|`?tw%WNC00Wd(A6I)@Dhu@T4L2y zXBr%x-FA(E*i6tMFU)0*=$`AI_g;d!ym*L~VXHP>OEk+Twntu=xv*Q8ci^G--Y~Nx z*pl=J25g^J4O6qS+oj`(7a^qRY`CuHB#fWvZben$ZCQEKVFI;*2jkOOPoB@j5 zfhH;T3s@9|u3O`CSRTHiRq@}VRC_*m+hXD#UCuZ(Dm`8?{LKh*n62LZ z`YdVx419ljj##Slbsi@m2#R~s_H|JQ@+4A?8CSlUt3+@+_HYfV?BJ?=D#ytqb1ox# z;}v)0Me~o=qV(A2wA`z^#!1a{t9G}8ORsQpxesA=^Ool4qbPE&z z$dl=Pe!#{%pJ;2{w^(l<+#0E{YT~RXa0E*+?|3wplr2PJC z#_rDpBVb$h>1@Fvo&}uLOSkn=By1*hZ##u=Gmw&g>QL6QDqX@ALLcr5aTuy*mn{W9 zHmjL7G|^}AzIsA<@YRzO6=w>-*3{~GQ<1Y>rWe8vF**7CGQ7N|HlV}{-O|g?0-J!g zbIb^axYwWY|G|+u>JN?-0z+`d-gKWK=<#O|4T{GD6QFn}Q*&DG95k~68X-aJ)=tz~ Qf(~bZgRMdE9HZ%f12(shoB#j- literal 0 HcmV?d00001 diff --git a/configFiles/NeuropixelsPhase3A301_kilosortChanMap.mat b/configFiles/NeuropixelsPhase3A301_kilosortChanMap.mat new file mode 100644 index 0000000000000000000000000000000000000000..eba6d3c0406aef8d766fca83778d9e4e2ac3ebf4 GIT binary patch literal 1789 zcma*mc{Cf?9tZF!(vJNNBGgn+t^LFn5?g7|n8dLJtwBRcWCryjRK;tnEuyt0DT)jg zf~alKqET8~RYO`;Eu(aarIe7guY(LPI?Ma(o%8NF_nv$1y}x_zx##=AxM9(5?hrdm zCO!+W0=AA!>vZP zTybf(K(Pm3a5E8&YVn%ONsqGuJ}G_0JrX_3h(dJ=kk&@DVg1@r(i6kLaH8mO^QFrQ zCJj{9HIt^RbZ?W!oYoOkaJal?p#Le&z`%eoWxE$7+VpbWO62EIRu9lJspN?rIm1)u zr+41L#QKtjrsP~u$h{!5#yih~99P3?ujC;$ zR!CyAeomf>%Rl|`4*$Y_$?~l^T@lw+H+i69hcq#l2Vc|MMZB#J*h!q$Ca8b*I_yz2 ztn-*=;cq|@P?YL(`dx$uU|m&mwYM)`Zr?*3 zRIPJVIvfCANk5h__9j{Ga@pG0gE+Y>5062>2Y_EseBFeCm86R}2v{j|`}O%bA%!B$ z2%eqoP;+xR01G5Xe4v4KOi6$$RA4;jBR4Xbu zBkAs|`GNq;&UZ!o!+(QR2pEB|SNMuEV@rvn-{p6UP!XJ#_Msz=x(R*o;+X`#`q``V zEuq!uLIh@-tPPFq%Yj>IB^sGd#5`pk&(}Xw)79)Kk+m)V`QYA%s{VzOIV=wbt_vx; zgc&%&QJg?{j@9tb?z~n_e|OK&)Jt92urQ{s?#57{cU?QGAEddOJrV=- z6V+8|^u}9}^g8pfOjGN?LU!r$k9C|PDMYg$oIl$=^)<|$k704}q2pr%2a7qP&$&#OEkuAT;e#-C z(cvXvQF|#(3mnz5PsL;-pCmf7@lf zGC%F3dc_uRv1gqvr7JKyy*-5gA@8bMB-$R+u?jI-RaIo^KoaYMs;Bp#dFPzQQZgTG1 z?2pmNH9T61d>#YpqG^p*d9>#QJwvGc{{CWiLH5^Jxc<%5_)n(Cc#j7Fzy{wQOJ5c0 zgPZD9hFC1}XMFD;02u+_#QjJGAWaPXrwU+-Qm!xTt z0MgP3?6a4v2(7766eT-~n8QNIU(c61oj4Fj}ZlIJS9#!}i#eFVW<{2Nh(r Uvj;gtc|*WVB_+Uwx`Nt&0LSGy2LJ#7 literal 0 HcmV?d00001 diff --git a/configFiles/NeuropixelsPhase3B10x28staggered0x29_kilosortChanMap.mat b/configFiles/NeuropixelsPhase3B10x28staggered0x29_kilosortChanMap.mat index 3e73840452ec6f3f21d28064d34f69e86ad78e00..5e9277fbd348ef5e726d20deb5925c166b331d44 100644 GIT binary patch delta 494 zcmaFBH$z~8i9~Q|o`PdRk%Ez#f{~Gxv9Xn@p@NZtk-@}3<%tQb8%riJF`7=k%cfAj z&g0y9eJ{A9WH`t0<3W(uZ1&Y2g>w<2J<>+EW|%G5*Zo^y`OeiQg;hOyp@4+k0)-`7mkTk0Jx zV)3MfZTg)HPdm-+I8G;4Hs^N91TH-(y8TN<+L0+4s!=C59=1!(np1pMwl+?}_^_5I zs;MWlHtjkor`l(!{J4papGV@;Iq{cKySBMDI{Rf5oaBw2SMj)cgWR?M42IgLCvN6u d>0@FvpFE3ApZnmIo+d_ChOjbri^(_GIslVI**pLM delta 336 zcmV-W0k8g;5a14wG#FH9bs#}-av(A@ATT*PGBY|gG9WTAGBA-*BavVQv2>CGe@p-X z0C=3^V_;ygU}j)o1JWD}5E=wnq5K9$1_mGkV|E~}NY2kMN-1Vw0V!ez5ehWLCm3GP z6c2UxPhi}@NJ|{SRKY|`Jk;F}0HL(M9uEKj0IdK30C=43(9I12Q4qlKdAq)n-F*oa zPyt6LwgEJN3pZpFPCgEkU<_rRM12p zW6Tir{v|fpi6dKh7f1=iT%hzsseTIrzo|L_0X2m8MO9^uyS iv&RIE0s%XdRtGN+LDh^jz(^vXwfO*0*9I7qkOzo==Y*vI diff --git a/configFiles/neuropixPhase3B1_kilosortChanMap_all.mat b/configFiles/neuropixPhase3B1_kilosortChanMap_all.mat new file mode 100644 index 0000000000000000000000000000000000000000..d03bfa4b04a97a831b4630466f7d1f7d939578a2 GIT binary patch literal 2072 zcma*ocTm%58VB$MFa!hyL<|c0i!4PTA%#>zji5+VKsUBEArM#vDMkd1O6VW~sZtdT zQqI+LiGpZ6q=f_sK~M}TvJ?YYsNQ)Xvn-dw6eHTc1+_fnv)LIcM0L)eMrGv)A!OQ+M|B&uhV? z?pd(mOg+9R6ozXBv|L@L16*5lP#UWF`^esRRSgUbE~>t1`plUeKD&B;)Y(fn2?7&c zVEKp>9jMT$YXV4FS#EyGm3;R|)=o>TOy4=OV*8c{sX4q46M#h%)PkTl0k>Te`+_z7 z(pn8FEjuiIH1_&s^ykBK>+|`PO|^Rr3TnMJ{l$;>?pcNT{aSC6cYfKS?>16ZxAdpc zwhSay+wn<=P!H>|v>IG^sHgqQ0j?UkKfRX?&$+|q+dE8o*q$3m{o5zLlPXLM&kM2`~7$3Y|6YMcOA7 z(>f@kaY{>`4P$3Ft=FF-)jYVMC9BVP{u2Z7o+g9f>(ixS3~Sju<(0fwCuTv#w*K<4 zu)SN@dP^vJFKj6lcD(w)u$`Sl&dxh-pM!sq%fLDddfOK@u1BY}aYU2Gqn4e--pw3| zhEH_HK-s9xBvG~w8J*tE>3G3uYvgo(K00dUG-v8Gk8Pf_Zl1TlG-vVRLGSSusrIRb z4Xe?YU#)bmzSa;P7r=tGZMuZ2K z%{Mgg!ox$(+ZWMy+Yb#P&0ju071NmN@G&^zi~PmKf1Hh1H3`a1^o~iRnkXXujYs$F zCcQ*glLzfONpCcK*tvP-V*xeAGwcxJz@c7z+oLm0{J$#&Cj7TR_t{`(JN?04u1g z%C&-3%45vxC7Gd0d1YU`e(som;F!N@uD?Dux@FolEpu1(6MbWVD%Ph9avYr3EHWl` z5aPt1j;)gaw^(WO*3x&Km0x;SK&935S|9Bb`YwnQ{;3al`DHKLRls?nUK>mLGZnj@ zCKTSgp=4YyOyK1JgH6Vt(W zQA@U9EL&9BDi@pM1B<$Sjb2>#Jl&N#XyE#1;oURa#(&P6WDWPnfGO}PYvZ%WGfjE1m-3+;i zZH`R7b#$EEys(wJHslqap!chhC8f zasB588@bxyLEjt_x(plc72FPI9^oq;%Afj(<^)ZM z0SM%!3aL>Baeo|)0q*SSmD%py$)YE8{doF_XZ9Gk4Mevh#VW(`;IIEdAADaRLl^YR z@whJk>&ppTXB12elBk@Z5LGTcQUO(Cv1g#KGr9D&6SahvwF#=0@NK91^`2VdsN?L+ z>aXSHJs3hEXI#hNdYcqk zj<%V_dLiRs*N*Ab?yGStJ-;n3xYfd=tiANZ{;?t~a%%2LJbadugS_c^#DF-F1iAXU z?`=B45!%g&OAOC2-hWfFsqs@Rt35x|?6BnLHpk!-Wv8Vk7k+?Y+pIkPx2xnt{X@w% rvnBA2j+_+Sq&`62)v1_s8iFdaRu7c3k-8sJj+?1HQ literal 0 HcmV?d00001 diff --git a/mainLoop/runTemplates.m b/mainLoop/runTemplates.m index 7f588a1f..ebdf6f6a 100644 --- a/mainLoop/runTemplates.m +++ b/mainLoop/runTemplates.m @@ -73,26 +73,28 @@ Nchan = rez.ops.Nchan; nt0 = rez.ops.nt0; -nKeep = min(Nchan*3,20); % how many PCs to keep -rez.W_a = zeros(nt0 * Nrank, nKeep, Nfilt, 'single'); -rez.W_b = zeros(Nbatches, nKeep, Nfilt, 'single'); -rez.U_a = zeros(Nchan* Nrank, nKeep, Nfilt, 'single'); -rez.U_b = zeros(Nbatches, nKeep, Nfilt, 'single'); -for j = 1:Nfilt - % do this for every template separately - WA = reshape(rez.WA(:, j, :, :), [], Nbatches); - WA = gpuArray(WA); % svd on the GPU was faster for this, but the Python randomized CPU version might be faster still - [A, B, C] = svdecon(WA); - % W_a times W_b results in a reconstruction of the time components - rez.W_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep)); - rez.W_b(:,:,j) = gather(C(:, 1:nKeep)); +if 0 + nKeep = min(Nchan*3,20); % how many PCs to keep + rez.W_a = zeros(nt0 * Nrank, nKeep, Nfilt, 'single'); + rez.W_b = zeros(Nbatches, nKeep, Nfilt, 'single'); + rez.U_a = zeros(Nchan* Nrank, nKeep, Nfilt, 'single'); + rez.U_b = zeros(Nbatches, nKeep, Nfilt, 'single'); + for j = 1:Nfilt + % do this for every template separately + WA = reshape(rez.WA(:, j, :, :), [], Nbatches); + WA = gpuArray(WA); % svd on the GPU was faster for this, but the Python randomized CPU version might be faster still + [A, B, C] = svdecon(WA); + % W_a times W_b results in a reconstruction of the time components + rez.W_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep)); + rez.W_b(:,:,j) = gather(C(:, 1:nKeep)); + + UA = reshape(rez.UA(:, j, :, :), [], Nbatches); + UA = gpuArray(UA); + [A, B, C] = svdecon(UA); + % U_a times U_b results in a reconstruction of the time components + rez.U_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep)); + rez.U_b(:,:,j) = gather(C(:, 1:nKeep)); + end - UA = reshape(rez.UA(:, j, :, :), [], Nbatches); - UA = gpuArray(UA); - [A, B, C] = svdecon(UA); - % U_a times U_b results in a reconstruction of the time components - rez.U_a(:,:,j) = gather(A(:, 1:nKeep) * B(1:nKeep, 1:nKeep)); - rez.U_b(:,:,j) = gather(C(:, 1:nKeep)); -end - -fprintf('Finished compressing time-varying templates \n') \ No newline at end of file + fprintf('Finished compressing time-varying templates \n') +end \ No newline at end of file diff --git a/preProcess/benchmarks_manipulator.m b/preProcess/benchmarks_manipulator.m new file mode 100644 index 00000000..84cc6f93 --- /dev/null +++ b/preProcess/benchmarks_manipulator.m @@ -0,0 +1,97 @@ +% benchmark manipulator +addpath(genpath('D:\GitHub\KiloSort2')) % path to kilosort folder +addpath('D:\GitHub\npy-matlab') + +pathToYourConfigFile = 'D:\GitHub\KiloSort2\configFiles'; % take from Github folder and put it somewhere else (together with the master_file) +run(fullfile(pathToYourConfigFile, 'configFile384.m')) +rootH = 'H:\'; +ops.fproc = fullfile(rootH, 'temp_wh.dat'); % proc file on a fast SSD + +root0 = 'F:\Spikes\manipulator'; +chanmaps = {'neuropixPhase3B1_kilosortChanMap_all.mat', 'NP2_kilosortChanMap.mat'}; +for j = 1:2 + chanmaps{j} = fullfile(root0, chanmaps{j}); +end + +addpath('D:\Drive\CODE\KS2') +%% +dt = 1; +root0 = 'F:\Spikes\manipulator'; +dr = []; +cc = []; +cc2 = {}; +%% +for iprobe = 2 + for isess = [3] + for ishift = [1 2 3] + rootZ = fullfile(root0, sprintf('drift%d', isess), sprintf('p%d', iprobe)); + if ishift<3 + fname = fullfile(rootZ, sprintf('rez2_%d.mat', ishift-1)); + load(fname) + dr{iprobe, isess} = rez.row_shifts; + else + fname = fullfile(rootZ, 'rez_datashift.mat'); + load(fname) + end + + bin = ops.fs * dt; + st = rez.st3(:,1); + clu = rez.st3(:,2); + nspikes = length(st); + S = sparse(clu, ceil(st/bin), ones(nspikes,1)); + igood = get_good_units(rez); + S = S(igood, :); + nbins = size(S,2); + + root_drift = fullfile(root0, sprintf('drift%d', isess)); + dy = readNPY(fullfile(root_drift, 'manip.positions.npy')); + ts = readNPY(fullfile(root_drift, sprintf('manip.timestamps_p%d.npy', iprobe))); + dy_samps = interp1(ts, dy, dt/2 + [0:dt:dt*(nbins-1)]); + dy_samps(isnan(dy_samps)) = 0; + + ix = ceil(ts(2)/dt):ceil(ts(end)/dt); + ix2 = [1:ceil(ts(2)/dt) ceil(ts(end)/dt):nbins]; + + ix2(length(ix)+1:end) = []; + + cc{iprobe, isess, ishift} = corr(dy_samps(ix)', S(:, ix)'); + cc2{iprobe, isess, ishift} = corr(dy_samps(ix(1:length(ix2)))', S(:, ix2)'); + end + end +end +%% +iprobe = 2; +isess = 3; +ishift = 1; +icc1 = cc{iprobe, isess, ishift}; +icc2 = cc2{iprobe, isess, ishift}; + +cq = quantile(icc2, [.025, .975]); +NN = length(icc1); +nbad = sum(icc1cq(2)); +disp([NN-nbad, nbad, nbad/NN]) +%% +csd = cellfun(@(x) mean(abs(x)), cc); +c2sd = cellfun(@(x) mean(abs(x)), cc2); +%% +sq(mean(csd, 2)) +sq(mean(c2sd, 2)) + +%% +iprobe = 2; +isess = 3; +ishift = 1; +rootZ = fullfile(root0, sprintf('drift%d', isess), sprintf('p%d', iprobe)); +fname = fullfile(rootZ, sprintf('rez2_%d.mat', ishift-1)); +load(fname) +%% + +imagesc(rez.ccb, [-5, 5]) +%% +cellfun(@(x) numel(x), cc) + + + +%% + + diff --git a/preProcess/clusterSingleBatches.m b/preProcess/clusterSingleBatches.m index db3ea93a..3eb0d466 100644 --- a/preProcess/clusterSingleBatches.m +++ b/preProcess/clusterSingleBatches.m @@ -100,61 +100,10 @@ fprintf('time %2.2f, pre clustered %d / %d batches \n', toc, ibatch, nBatches) end end -%% -% find Z offsets -% anothr one of these Params variables transporting parameters to the C++ code -Params = [1 NrankPC Nfilt 0 size(W,1) 0 NchanNear Nchan]; - -if isfield(ops, 'midpoint') - splits = [0, ceil(ops.midpoint/ops.NT), nBatches]; -else - splits = [0, nBatches]; -end -for k = 1:length(splits)-1 - ib = splits(k)+1:splits(k+1); - Params(1) = size(Ws,3) * length(ib); % the total number of templates is the number of templates per batch times the number of batches - [iminy{k}, ww{k}, Ns{k}] = find_integer_shifts(Params, Whs(:, ib),Ws(:,:,:,ib),... - mus(:, ib), ns(:,ib), iC, Nchan, Nfilt); -end - -if isfield(ops, 'midpoint') - iChan = 1:Nchan; - iUp = mod(iChan + 2-1, Nchan)+1; - iDown = mod(iChan - 2-1, Nchan)+1; - iMap = [iUp(iUp); iUp; iChan; iDown; iDown(iDown)]; - - - mu1 = 1e-5 + sq(sum(sum(ww{1}.^2, 1),2)).^.5; - mu2 = 1e-5 + sq(sum(sum(ww{2}.^2, 1),2)).^.5; - - CC = gpuArray.zeros(Nfilt, Nfilt, 5); - for k = 1:5 - W0 = ww{1}; - for t = 1:Nfilt - W0(:,iMap(k,:),t) = W0(:,:,t); - end - X1 = reshape(W0, [nPCs * Nchan, Nfilt]); - X2 = reshape(ww{2}, [nPCs * Nchan, Nfilt]); - CC(:,:, k) = X1' * X2 ./ (mu1 * mu2'); - % CC(:,:, k) = 2 * X1' * X2 - mu1.^2 - mu2'.^2; - end - - csum = sq(mean(max(CC.* Ns{2}' , [], 1), 2)); - % csum = sq(mean(max(CC , [], 1), 2)); - [cmax, imax] = max(csum); - imin = cat(2, iminy{1}, iminy{2} + (imax-3)); - imin = min(5, max(1, imin)); - - disp(imax) -else - imin = iminy{1}; -end + %% Params(1) = size(Ws,3) * size(Ws,4); % the total number of templates is the number of templates per batch times the number of batches -Whs2 = mod(Whs + 2 * int32(imin-3) - 1, Nchan) + 1; -rez.row_shifts = imin - 3; - tic % initialize dissimilarity matrix @@ -162,7 +111,7 @@ for ibatch = 1:nBatches % for every batch, compute in parallel its dissimilarity to ALL other batches - Wh0 = single(Whs2(:, ibatch)); % this one is the primary batch + Wh0 = single(Whs(:, ibatch)); % this one is the primary batch mu = mus(:, ibatch); % embed the templates from the primary batch back into a full, sparse representation @@ -175,7 +124,7 @@ iMatch = sq(min(abs(single(iC) - reshape(Wh0, 1, 1, [])), [], 1))<.1; % compute dissimilarities for iMatch = 1 - [iclust, ds] = mexDistances2(Params, Ws, W, iMatch, iC-1, Whs2-1, mus, mu); + [iclust, ds] = mexDistances2(Params, Ws, W, iMatch, iC-1, Whs-1, mus, mu); % ds are squared Euclidian distances ds = reshape(ds, Nfilt, []); % this should just be an Nfilt-long vector @@ -215,13 +164,6 @@ rez.iorig = gather(iorig); rez.ccbsort = gather(ccbsort); +% rez.iorig = randperm(nBatches); + fprintf('time %2.2f, Re-ordered %d batches. \n', toc, nBatches) -%% -nup = 0; -for ibatch = 1:nBatches - if abs(imin(ibatch) - 3) > 0 - shift_batch_on_disk(rez, ibatch, imin(ibatch) - 3); - nup = nup + 1; - end -end -fprintf('time %2.2f, Shifted up/down %d batches. \n', toc, nup) diff --git a/preProcess/clusterSingleBatchesShift.m b/preProcess/clusterSingleBatchesShift.m new file mode 100644 index 00000000..0f317653 --- /dev/null +++ b/preProcess/clusterSingleBatchesShift.m @@ -0,0 +1,243 @@ +function rez = clusterSingleBatchesShift(rez) +% outputs an ordering of the batches according to drift +% for each batch, it extracts spikes as threshold crossings and clusters them with kmeans +% the resulting cluster means are then compared for all pairs of batches, and a dissimilarity score is assigned to each pair +% the matrix of similarity scores is then re-ordered so that low dissimilaity is along the diagonal + + +rng('default'); rng(1); + +ops = rez.ops; + +if getOr(ops, 'reorder', 0)==0 + rez.iorig = 1:rez.temp.Nbatch; % if reordering is turned off, return consecutive order + return; +end + +nPCs = getOr(rez.ops, 'nPCs', 3); +Nfilt = ceil(rez.ops.Nchan/2); +tic +wPCA = extractPCfromSnippets(rez, nPCs); % extract PCA waveforms pooled over channels +fprintf('Obtained 7 PC waveforms in %2.2f seconds \n', toc) % 7 is the default, and I don't think it needs to be able to change + +Nchan = rez.ops.Nchan; +niter = 10; % iterations for k-means. we won't run it to convergence to save time + +nBatches = rez.temp.Nbatch; +NchanNear = min(Nchan, 2*8+1); + +% initialize big arrays on the GPU to hold the results from each batch +Ws = gpuArray.zeros(nPCs , NchanNear, Nfilt, nBatches, 'single'); % this holds the unit norm templates +mus = gpuArray.zeros(Nfilt, nBatches, 'single'); % this holds the scalings +ns = gpuArray.zeros(Nfilt, nBatches, 'single'); % this holds the number of spikes for that cluster +Whs = gpuArray.ones(Nfilt, nBatches, 'int32'); % this holds the center channel for each template + +i0 = 0; + +NrankPC = 3; % I am not sure if this gets used, but it goes into the function + +iC = getClosestChannels(rez, ops.sigmaMask, NchanNear); % return an array of closest channels for each channel + +tic +for ibatch = 1:nBatches + [uproj, call] = extractPCbatch2(rez, wPCA, min(nBatches-1, ibatch), iC); % extract spikes using PCA waveforms + % call contains the center channels for each spike + + if sum(isnan(uproj(:)))>0 %sum(mus(:,ibatch)<.1)>30 + break; % I am not sure what case this safeguards against.... + end + + if size(uproj,2)>Nfilt + % if a batch has at least as many spikes as templates we request, then cluster it + [W, mu, Wheights, irand] = initializeWdata2(call, uproj, Nchan, nPCs, Nfilt, iC); % this initialize the k-means + + % Params is a whole bunch of parameters sent to the C++ scripts inside a float64 vector + Params = [size(uproj,2) NrankPC Nfilt 0 size(W,1) 0 NchanNear Nchan]; + + for i = 1:niter + Wheights = reshape(Wheights, 1,1,[]); % this gets reshaped for broadcasting purposes + % we only compute distances to clusters on the same channels + iMatch = sq(min(abs(single(iC) - Wheights), [], 1))<.1; % this tells us which spikes and which clusters might match + + % get iclust and update W + [dWU, iclust, dx, nsp, dV] = mexClustering2(Params, uproj, W, mu, ... + call-1, iMatch, iC-1); % CUDA script to efficiently compute distances for pairs in which iMatch is 1 + + dWU = dWU./(1e-5 + single(nsp')); % divide the cumulative waveform by the number of spikes + + mu = sum(dWU.^2,1).^.5; % norm of cluster template + W = dWU./(1e-5 + mu); % unit normalize templates + + W = reshape(W, nPCs, Nchan, Nfilt); + nW = sq(W(1, :, :).^2); % compute best channel from the square of the first PC feature + W = reshape(W, Nchan * nPCs, Nfilt); + + [~, Wheights] = max(nW,[], 1); % the new best channel of each cluster template + end + + % carefully keep track of cluster templates in dense format + W = reshape(W, nPCs, Nchan, Nfilt); + W0 = gpuArray.zeros(nPCs, NchanNear, Nfilt, 'single'); + for t = 1:Nfilt + W0(:, :, t) = W(:, iC(:, Wheights(t)), t); + end + W0 = W0 ./ (1e-5 + sum(sum(W0.^2,1),2).^.5); % I don't really know why this needs another normalization + end + + if exist('W0', 'var') + % if a batch doesn't have enough spikes, it gets the cluster templates of the previous batch + Ws(:, :, :, ibatch) = W0; + mus(:, ibatch) = mu; + ns(:, ibatch) = nsp; + Whs(:, ibatch) = int32(Wheights); + else + % if the first batch doesn't have enough spikes, then it is skipped completely + warning('data batch #%d only had %d spikes \n', ibatch, size(uproj,2)) + end + i0 = i0 + Nfilt; + + if rem(ibatch, 500)==1 + fprintf('time %2.2f, pre clustered %d / %d batches \n', toc, ibatch, nBatches) + end +end +%% + +if ops.shift_data + % find Z offsets + % anothr one of these Params variables transporting parameters to the C++ code + Params = [1 NrankPC Nfilt 0 size(W,1) 0 NchanNear Nchan]; + + if isfield(ops, 'midpoint') + splits = [0, ceil(ops.midpoint/ops.NT), nBatches]; + else + splits = [0, nBatches]; + end + for k = 1:length(splits)-1 + ib = splits(k)+1:splits(k+1); + Params(1) = size(Ws,3) * length(ib); % the total number of templates is the number of templates per batch times the number of batches + [iminy{k}, ww{k}, Ns{k}] = find_integer_shifts(Params, Whs(:, ib),Ws(:,:,:,ib),... + mus(:, ib), ns(:,ib), iC, Nchan, Nfilt); + end + + if isfield(ops, 'midpoint') + iChan = 1:Nchan; + iUp = mod(iChan + 2-1, Nchan)+1; + iDown = mod(iChan - 2-1, Nchan)+1; + iMap = [iUp(iUp); iUp; iChan; iDown; iDown(iDown)]; + + + mu1 = 1e-5 + sq(sum(sum(ww{1}.^2, 1),2)).^.5; + mu2 = 1e-5 + sq(sum(sum(ww{2}.^2, 1),2)).^.5; + + CC = gpuArray.zeros(Nfilt, Nfilt, 5); + for k = 1:5 + W0 = ww{1}; + for t = 1:Nfilt + W0(:,iMap(k,:),t) = W0(:,:,t); + end + X1 = reshape(W0, [nPCs * Nchan, Nfilt]); + X2 = reshape(ww{2}, [nPCs * Nchan, Nfilt]); + CC(:,:, k) = X1' * X2 ./ (mu1 * mu2'); + % CC(:,:, k) = 2 * X1' * X2 - mu1.^2 - mu2'.^2; + end + + csum = sq(mean(max(CC.* Ns{2}' , [], 1), 2)); + % csum = sq(mean(max(CC , [], 1), 2)); + [cmax, imax] = max(csum); + imin = cat(2, iminy{1}, iminy{2} + (imax-3)); + imin = min(5, max(1, imin)); + + disp(imax) + else + imin = iminy{1}; + end + + imin = imin - 3; + figure(263); + plot(imin); + drawnow +else + imin = zeros(1, nBatches); +end + +% imin(:) = 0; + +%% +Params(1) = size(Ws,3) * size(Ws,4); % the total number of templates is the number of templates per batch times the number of batches + +Whs2 = mod(Whs + 2 * int32(imin) - 1, Nchan) + 1; +rez.row_shifts = imin; + + +tic + +% initialize dissimilarity matrix +ccb = gpuArray.zeros(nBatches, 'single'); + +for ibatch = 1:nBatches + % for every batch, compute in parallel its dissimilarity to ALL other batches + Wh0 = single(Whs2(:, ibatch)); % this one is the primary batch + mu = mus(:, ibatch); + + % embed the templates from the primary batch back into a full, sparse representation + W = gpuArray.zeros(nPCs , Nchan, Nfilt, 'single'); + for t = 1:Nfilt + W(:, iC(:, Wh0(t)), t) = Ws(:, :, t, ibatch); + end + + % pairs of templates that live on the same channels are potential "matches" + iMatch = sq(min(abs(single(iC) - reshape(Wh0, 1, 1, [])), [], 1))<.1; + + % compute dissimilarities for iMatch = 1 + [iclust, ds] = mexDistances2(Params, Ws, W, iMatch, iC-1, Whs2-1, mus, mu); + + % ds are squared Euclidian distances + ds = reshape(ds, Nfilt, []); % this should just be an Nfilt-long vector + ds = max(0, ds); + ccb(ibatch,:) = mean(sqrt(ds) .* ns, 1)./mean(ns,1); % weigh the distances according to number of spikes in cluster + + if rem(ibatch, 500)==1 + fprintf('time %2.2f, compared %d / %d batches \n', toc, ibatch, nBatches) + end +end + +% some normalization steps are needed: zscoring, and symmetrizing ccb +ccb0 = zscore(ccb, 1, 1); +ccb0 = ccb0 + ccb0'; + +rez.ccb = gather(ccb0); + +% sort by manifold embedding algorithm +% iorig is the sorting of the batches +% ccbsort is the resorted matrix (useful for diagnosing drift) +[ccbsort, iorig] = sortBatches2(ccb0); + +%% some mandatory diagnostic plots to understand drift in this dataset +figure; +subplot(1,2,1) +imagesc(ccb0, [-5 5]); drawnow +xlabel('batches') +ylabel('batches') +title('batch to batch distance') + +subplot(1,2,2) +imagesc(ccbsort, [-5 5]); drawnow +xlabel('sorted batches') +ylabel('sorted batches') +title('AFTER sorting') + +rez.iorig = gather(iorig); +rez.ccbsort = gather(ccbsort); + +% rez.iorig = randperm(nBatches); + +fprintf('time %2.2f, Re-ordered %d batches. \n', toc, nBatches) +%% +nup = 0; +for ibatch = 1:nBatches + if abs(imin(ibatch)) > 0 + shift_batch_on_disk(rez, ibatch, imin(ibatch)); + nup = nup + 1; + end +end +fprintf('time %2.2f, Shifted up/down %d batches. \n', toc, nup) diff --git a/preProcess/kernelD.m b/preProcess/kernelD.m new file mode 100644 index 00000000..f9c24a3d --- /dev/null +++ b/preProcess/kernelD.m @@ -0,0 +1,28 @@ +function K = kernelD(xp0,yp0,len) + +D = size(xp0,1); +N = size(xp0,2); +M = size(yp0,2); + +% split M into chunks if on GPU to reduce memory usage +if isa(xp0,'gpuArray') + K=gpuArray.zeros(N,M); + cs = 60; +elseif N > 10000 + K = zeros(N,M); + cs = 10000; +else + K= zeros(N,M); + cs = M; +end + +for i = 1:ceil(M/cs) + ii = [((i-1)*cs+1):min(M,i*cs)]; + mM = length(ii); + xp = repmat(xp0,1,1,mM); + yp = reshape(repmat(yp0(:,ii),N,1),D,N,mM); + + Kn = exp( -sum(bsxfun(@times,(xp - yp).^2,1./(len.^2))/2,1)); + K(:,ii) = squeeze(Kn); + +end diff --git a/preProcess/shift_batch_on_disk.m b/preProcess/shift_batch_on_disk.m index 11f2a047..cd5935f1 100644 --- a/preProcess/shift_batch_on_disk.m +++ b/preProcess/shift_batch_on_disk.m @@ -18,10 +18,25 @@ function shift_batch_on_disk(rez, ibatch, shift) fseek(fid, offset, 'bof'); dat = fread(fid, [NT ops.Nchan], '*int16'); -iChan = 1:ops.Nchan; -iChanS = mod(iChan + 2 * shift - 1, ops.Nchan) + 1; -dat(:, iChanS) = dat; +ishift = round(shift); +shift = shift - ishift; +dat = reshape(dat, [NT, 2, ops.Nchan/2]); +dat = circshift(dat, ishift, 3); + +if shift > 0 + alpha = shift; + datup = circshift(dat, 1, 3); + dat = dat * (1-alpha) + alpha * datup; +else + alpha = -shift; + datup = circshift(dat, -1, 3); + dat = dat * (1-alpha) + alpha * datup; +end + +if alpha<0 || alpha>1 + disp(alpha) +end fseek(fid, offset, 'bof'); fwrite(fid, dat, 'int16'); % write this batch to binary file diff --git a/preProcess/shift_from_hist.m b/preProcess/shift_from_hist.m new file mode 100644 index 00000000..120ab58b --- /dev/null +++ b/preProcess/shift_from_hist.m @@ -0,0 +1,45 @@ +function [st3, imin] = shift_from_hist(rez, flag) + +ymin = min(rez.yc); +ymax = max(rez.yc); +xmin = min(rez.xc); + +if flag + rez.ops.yup = ymin:7.5:ymax; % centers of the upsampled y positions + rez.ops.xup = xmin + [0 16 32]; % centers of the upsampled x positions +else + rez.ops.yup = ymin:10:ymax; % centers of the upsampled y positions + rez.ops.xup = [11 27 43 59]; %[0 16 32]; % centers of the upsampled x positions +end + +spkTh = 10; % same as "template amplitude" with generic templates + +st3 = standalone_detector(rez, spkTh); + +dd = 5; +dep = st3(:,2); +dmin = ymin - 1; +dep = dep - dmin; + +dmax = 1 + ceil(max(dep)/dd); +ops = rez.ops; +dt = ops.NT; +NT = max(st3(:,1)); +Nbatches = ceil(NT/dt); + +batch_id = ceil(st3(:,1)/dt); + +F = zeros(dmax, 20, Nbatches); +for t = 1:Nbatches + ix = find(batch_id==t); + dep = st3(ix,2) - dmin; + amp = log10(min(99, st3(ix,3))) - log10(spkTh); + amp = amp / (log10(100) - log10(spkTh)); + + M = sparse(ceil(dep/dd), ceil(1e-5 + amp * 20), ones(numel(ix), 1), dmax, 20); + F(:, :, t) = log2(1+M); +end + +[imin, F0] = align_block(F); + +imin = imin * 5; diff --git a/preProcess/sortBatches2.m b/preProcess/sortBatches2.m index 15d4a36a..c11539d9 100644 --- a/preProcess/sortBatches2.m +++ b/preProcess/sortBatches2.m @@ -8,6 +8,7 @@ ccb0 = gpuArray(ccb0); % compute its svd on the GPU (this might also be fast enough on CPU) +% [u, s, v] = svd(ccb0); [u, s, v] = svdecon(ccb0); % initialize the positions xs of the batch embeddings to be very small but proportional to the first PC