diff --git a/fairseq/criterions/wav2vec_criterion.py b/fairseq/criterions/wav2vec_criterion.py
index 85403cb428..52464ecfc1 100644
--- a/fairseq/criterions/wav2vec_criterion.py
+++ b/fairseq/criterions/wav2vec_criterion.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import math
+import logging
 
 import torch
 import torch.nn.functional as F
@@ -12,6 +13,7 @@
 from fairseq.criterions import FairseqCriterion, register_criterion
 from fairseq.logging.meters import safe_round
 
+logger = logging.getLogger(__name__)
 
 @register_criterion('wav2vec')
 class Wav2vecCriterion(FairseqCriterion):
@@ -41,9 +43,19 @@ def forward(self, model, sample, reduce=True, log_pred=False):
         2) the sample size, which is used as the denominator for the gradient
         3) logging outputs to display while training
         """
+        #torch.set_printoptions(profile="full")
+        #logger.info("{}".format(sample['net_input']['source']))
+        #torch.set_printoptions(profile="default")
+
         net_output = model(**sample['net_input'])
         logits = model.get_logits(net_output).float()
         target = model.get_targets(sample, net_output)
+        
+        torch.set_printoptions(profile="full")
+        logger.info("logits\n{}".format(logits))
+        minusinf = (logits == float("-inf")).sum(-1)
+        logger.info("minus infs\n{} / {}".format(minusinf, logits.size(-1)))
+        torch.set_printoptions(profile="default")
 
         weights = None
         if hasattr(model, 'get_target_weights') and not self.infonce:
@@ -55,6 +67,7 @@ def forward(self, model, sample, reduce=True, log_pred=False):
 
         if self.infonce:
             loss = F.cross_entropy(logits, target, reduction="sum" if reduce else "none",)
+            logger.info("cross entropy loss {}".format(loss))
         else:
             loss = F.binary_cross_entropy_with_logits(logits, target.float(), weights, reduction="sum" if reduce else "none",)
 
@@ -75,6 +88,14 @@ def forward(self, model, sample, reduce=True, log_pred=False):
                     loss += p
                     losses.append(p)
 
+        #llll = loss.item() if reduce else loss
+        #if llll / sample_size >= 3.0:
+        #    import ptvsd
+        #    ptvsd.enable_attach(('0.0.0.0', 7310))
+        #    print("Attach debugger now")
+        #    ptvsd.wait_for_attach() 
+        #    logger.info("Loss per sample {} >= 3.0!\n".format(llll))
+
         logging_output = {
             'loss': loss.item() if reduce else loss,
             'ntokens': sample_size,
@@ -105,6 +126,8 @@ def forward(self, model, sample, reduce=True, log_pred=False):
 
                 logging_output["correct"] = corr
                 logging_output["count"] = count
+                logging_output["num_correct"] = net_output["num_correct"]
+                logging_output["num_all"] = net_output["num_all"]
 
         if log_pred:
             logging_output['logits'] = logits.cpu().numpy()
@@ -129,6 +152,12 @@ def reduce_metrics(logging_outputs) -> None:
         total = sum(log.get("count", 0) for log in logging_outputs)
         metrics.log_scalar("_total", total)
 
+        num_correct = sum(log.get("num_correct", 0) for log in logging_outputs)
+        metrics.log_scalar("num_correct", num_correct)
+
+        num_all = sum(log.get("num_all", 0) for log in logging_outputs)
+        metrics.log_scalar("num_all", num_all)
+
 
         if total > 0:
             metrics.log_derived(
@@ -137,8 +166,14 @@ def reduce_metrics(logging_outputs) -> None:
                 if meters["_total"].sum > 0
                 else float("nan"),
             )
+            metrics.log_derived(
+                "accuracy_2",
+                lambda meters: safe_round(meters["num_correct"].sum / meters["num_all"].sum, 5)
+                if meters["num_all"].sum > 0
+                else float("nan"),
+            )
 
-        builtin_keys = {'loss', 'ntokens', 'nsentences', 'sample_size', 'correct', 'count'}
+        builtin_keys = {'loss', 'ntokens', 'nsentences', 'sample_size', 'correct', 'count', 'num_correct', 'num_all'}
 
         for k in logging_outputs[0]:
             if k not in builtin_keys:
diff --git a/fairseq/models/wav2vec/wav2vec2_scribblelens.py b/fairseq/models/wav2vec/wav2vec2_scribblelens.py
index 88c3798462..cb63d68328 100644
--- a/fairseq/models/wav2vec/wav2vec2_scribblelens.py
+++ b/fairseq/models/wav2vec/wav2vec2_scribblelens.py
@@ -29,6 +29,8 @@
 from fairseq.modules.transformer_sentence_encoder import init_bert_params
 from fairseq.utils import buffered_arange
 
+logger = logging.getLogger(__name__)
+
 @register_model("wav2vec2_scribblelens")
 class Wav2Vec2ModelSL(BaseFairseqModel):
     @staticmethod
@@ -502,6 +504,11 @@ def sample_negatives(self, y, num):
         if self.cross_sample_negatives > 0 and self.n_negatives > 0:
             neg_idxs = torch.cat([neg_idxs, cross_neg_idxs], dim=1)
 
+        torch.set_printoptions(profile="full")
+        logger.info("neg_idxs:\n{}".format(neg_idxs))
+        #logger.info("neg_idxs unique:\n{}".format(torch.unique(neg_idxs, sorted=False).size()))
+        torch.set_printoptions(profile="default")
+        
         negs = y[neg_idxs.view(-1)]
         negs = negs.view(
             bsz, num, self.n_negatives + self.cross_sample_negatives, fsz
@@ -511,8 +518,15 @@ def sample_negatives(self, y, num):
         return negs, neg_idxs
 
     def compute_preds(self, x, y, negatives):
-
+        
         neg_is_pos = (y == negatives).all(-1)
+        #torch.set_printoptions(profile="full")
+        #logger.info("y:\n{}".format(y))
+        #logger.info("negatives:\n{}".format(negatives))
+        #logger.info("neg_is_pos:\n{}".format(neg_is_pos))
+        #torch.set_printoptions(profile="default")
+
+
         y = y.unsqueeze(0)
         targets = torch.cat([y, negatives], dim=0)
 
@@ -557,6 +571,10 @@ def forward(self, source, padding_mask=None, mask=True, features_only=False):
 
         features = self.dropout_input(features)
         unmasked_features = self.dropout_features(unmasked_features)
+        
+        #torch.set_printoptions(profile="full")
+        #logger.info("unmasked_features:\n{}".format(unmasked_features))
+        #torch.set_printoptions(profile="default")
 
         num_vars = None
         code_ppl = None
@@ -594,24 +612,32 @@ def forward(self, source, padding_mask=None, mask=True, features_only=False):
             return {"x": x, "padding_mask": padding_mask}
 
         if self.quantizer:
-            q = self.quantizer(y, produce_targets=False)
+            q = self.quantizer(y, produce_targets=True)
             y = q["x"]
             num_vars = q["num_vars"]
             code_ppl = q["code_perplexity"]
             prob_ppl = q["prob_perplexity"]
             curr_temp = q["temp"]
+            targets = q["targets"]
+            
+            torch.set_printoptions(profile="full")
+            logger.info("quantizer targets:\n{}".format(targets))
+            torch.set_printoptions(profile="default")
 
             y = self.project_q(y)
 
             if self.negatives_from_everywhere:
+                logger.info("negatives_from_everywhere")
                 neg_cands, *_ = self.quantizer(unmasked_features, produce_targets=False)
                 negs, _ = self.sample_negatives(neg_cands, y.size(1))
                 negs = self.project_q(negs)
 
             else:
+                logger.info("negatives_from_everywhere else block")
                 negs, _ = self.sample_negatives(y, y.size(1))
 
             if self.codebook_negatives > 0:
+                logger.info("codebook_negatives > 0")
                 cb_negs = self.quantizer.sample_from_codebook(
                     y.size(0) * y.size(1), self.codebook_negatives
                 )
@@ -630,15 +656,22 @@ def forward(self, source, padding_mask=None, mask=True, features_only=False):
                 negs, _ = self.sample_negatives(y, y.size(1))
 
         x = x[mask_indices].view(x.size(0), -1, x.size(-1))
+        
+        #torch.set_printoptions(profile="full")
+        #logger.info("mask_indices:\n{}".format(mask_indices))
+        #torch.set_printoptions(profile="default")
 
         if self.target_glu:
             y = self.target_glu(y)
             negs = self.target_glu(negs)
 
         x = self.final_proj(x)
+        num_correct, num_all = self.miara_acc(x, y)
         x = self.compute_preds(x, y, negs)
 
         result = {"x": x, "padding_mask": padding_mask, "features_pen": features_pen}
+        result["num_correct"] = num_correct
+        result["num_all"] = num_all
 
         if prob_ppl is not None:
             result["prob_perplexity"] = prob_ppl
@@ -648,6 +681,21 @@ def forward(self, source, padding_mask=None, mask=True, features_only=False):
 
         return result
 
+    # x - encoder vectors
+    # y - quantizer vectors
+    def miara_acc(self, x, y):
+        x_size1 = x.size(1)
+        xx = x.repeat_interleave(x.size(1), 1) # BxTxC -> BxT^2xC
+        yy = y.repeat(1, y.size(1), 1) # BxTxC -> BxT^2xC
+        cos = torch.cosine_similarity(xx.float(), yy.float(), dim=-1).view(-1, x.size(1), x.size(1)) # BxT^2 -> BxTxT
+        maxi, _ = cos.max(dim=2, keepdim=True) # BxTx1
+        maxi = maxi * torch.eye(x.size(1), device=x.device).expand(cos.size(0), -1, -1) # BxTxT
+        
+        num_correct = (cos == maxi).sum().item()
+        num_all = x.size(0) * x.size(1)
+        logger.info("{} / {}".format(num_correct, num_all))
+        return num_correct, num_all
+
     def quantize(self, x):
         assert self.quantizer is not None
         x = self.feature_extractor(x)
diff --git a/fairseq/modules/gumbel_vector_quantizer.py b/fairseq/modules/gumbel_vector_quantizer.py
index 01ddd2298b..477534356c 100644
--- a/fairseq/modules/gumbel_vector_quantizer.py
+++ b/fairseq/modules/gumbel_vector_quantizer.py
@@ -3,10 +3,12 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+logger = logging.getLogger(__name__)
 
 class GumbelVectorQuantizer(nn.Module):
     def __init__(
@@ -153,6 +155,12 @@ def forward(self, x, produce_targets=False):
             .view(bsz * tsz, self.groups, -1)
         )
         hard_probs = torch.mean(hard_x.float(), dim=0)
+        
+        torch.set_printoptions(profile="full")
+        logger.info("hard_probs:\n{}".format(hard_probs))
+        #logger.info("hard_probs unique:\n{}".format(torch.unique(hard_probs, sorted=False).size()))
+        torch.set_printoptions(profile="default")
+        
         result["code_perplexity"] = torch.exp(
             -torch.sum(hard_probs * torch.log(hard_probs + 1e-7), dim=-1)
         ).sum()
@@ -164,6 +172,10 @@ def forward(self, x, produce_targets=False):
             -torch.sum(avg_probs * torch.log(avg_probs + 1e-7), dim=-1)
         ).sum()
 
+        #torch.set_printoptions(profile="full")
+        #logger.info("avg_probs:\n{}".format(avg_probs))
+        #torch.set_printoptions(profile="default")
+
         result["temp"] = self.curr_temp
 
         if self.training:
diff --git a/fairseq/trainer.py b/fairseq/trainer.py
index d3724f22b1..b4a719a3dc 100644
--- a/fairseq/trainer.py
+++ b/fairseq/trainer.py
@@ -468,6 +468,7 @@ def maybe_no_sync():
             try:
                 with maybe_no_sync():
                     # forward and backward
+                    logger.info("Batch {}/52".format(i))
                     loss, sample_size_i, logging_output = self.task.train_step(
                         sample=sample,
                         model=self.model,
diff --git a/fairseq_cli/train.py b/fairseq_cli/train.py
index cd3a93b13e..1c3da16de2 100644
--- a/fairseq_cli/train.py
+++ b/fairseq_cli/train.py
@@ -195,6 +195,16 @@ def train(args, trainer, task, epoch_itr):
         default_log_format=("tqdm" if not args.no_progress_bar else "simple"),
     )
 
+    #torch.set_printoptions(profile="full")    
+    for name, p in trainer.model.named_parameters():
+        if "quantizer.vars" in name or "quantizer.weight_proj.weight" in name or "project_q.weight" in name:
+            torch.set_printoptions(profile="full")    
+            logger.info("{}\n{}".format(name, p.data))
+            torch.set_printoptions(profile="default")
+        else:
+            logger.info("{}\n{}".format(name, p.data))
+    #torch.set_printoptions(profile="default")
+
     trainer.begin_epoch(epoch_itr.epoch)
 
     valid_losses = [None]
diff --git a/train.py b/train.py
index fbf7e57320..8eb9b292d5 100644
--- a/train.py
+++ b/train.py
@@ -11,8 +11,8 @@
 
 
 if __name__ == '__main__':
-    # import ptvsd
-    # ptvsd.enable_attach(('0.0.0.0', 7321))
-    # print("Attach debugger now")
-    # ptvsd.wait_for_attach()
+    #import ptvsd
+    #ptvsd.enable_attach(('0.0.0.0', 7310))
+    #print("Attach debugger now")
+    #ptvsd.wait_for_attach()
     cli_main()
diff --git a/uwr_related/experiments/jch/scrib.sh b/uwr_related/experiments/jch/scrib.sh
old mode 100644
new mode 100755
index 0f998977a9..12ca02cf84
--- a/uwr_related/experiments/jch/scrib.sh
+++ b/uwr_related/experiments/jch/scrib.sh
@@ -1,8 +1,39 @@
+#python train.py --distributed-world-size 1 --update-freq 2 \
+#  /pio/scratch/2/jch/wav2vec/data/scribblelens \
+#  --save-dir /pio/lscratch/1/jch/fairseq/try_sl2 --num-workers 0 \
+#  --keep-last-epochs 3 \
+#  --tensorboard-logdir /pio/scratch/2/jch/wav2vec/runs/try_sl2 --log-format simple  \
+#  --task scribblelens --criterion wav2vec --arch wav2vec2_scribblelens \
+#  --valid-subset test --pad-to-multiples-of 4 `#--max-sample-size 256` \
+#  --log-keys '["prob_perplexity","code_perplexity","temp"]' --quantize-targets --extractor-mode default \
+#  --conv-feature-layers '[(64, (3, 3), (1, 2), (1, 1)), (128, (5, 5), (2, 2), (2, 2)), (256, (3,3), (1, 1), (1, 1)), (256, (3,3), (1, 2), (1, 1)), (512, (3,3), (1, 1), (1, 1)), (512, (3,3), (1, 2), (1, 1)), (512, (3,2), (2, 1), (1, 0))]' \
+#  --final-dim 256 \
+#  --latent-vars 320 --latent-groups 2 --latent-temp '(2,0.5,0.999995)' --infonce \
+#  --optimizer adam --adam-betas '(0.9,0.98)' --adam-eps 1e-06 --lr-scheduler polynomial_decay \
+#  --total-num-update 400000 --lr 0.0005 --warmup-updates 32000 \
+#  --mask-length 10 --mask-prob 0.65 --mask-selection static --mask-other 0 \
+#  --encoder-layerdrop 0.05 --dropout-input 0.1 --dropout-features 0.1 --feature-grad-mult 0.1 \
+#  --loss-weights '[0.1, 10]' --conv-pos 128 --conv-pos-groups 16 \
+#  --num-negatives 100 --cross-sample-negatives 0 \
+#  `#--max-sample-size 250000 --min-sample-size 32000` \
+#  --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 --max-tokens 10000 --max-update 400000 \
+#  --skip-invalid-size-inputs-valid-test --ddp-backend no_c10d \
+#  --enable-padding # crashes without that, needs to make all lines same-size
+
+RUN=$1
+NUM=${RUN:5:3}
+NUMVER=${RUN#"debug"}
+echo $RUN $NUM $NUMVER
+
+mkdir -p /pio/scratch/1/i273233/runs/$RUN
+ln -s /pio/scratch/1/i273233/runs/try_sl3/checkpoint$NUM.pt /pio/scratch/1/i273233/runs/$RUN/before.pt
+
 python train.py --distributed-world-size 1 --update-freq 2 \
   /pio/scratch/2/jch/wav2vec/data/scribblelens \
-  --save-dir /pio/lscratch/1/jch/fairseq/try_sl2 --num-workers 0 \
+  --save-dir /pio/scratch/1/i273233/runs/$RUN --num-workers 0 \
   --keep-last-epochs 3 \
-  --tensorboard-logdir /pio/scratch/2/jch/wav2vec/runs/try_sl2 --log-format simple  \
+  --restore-file /pio/scratch/1/i273233/runs/$RUN/before.pt \
+  --tensorboard-logdir /pio/scratch/1/i273233/runs/$RUN --log-format simple  \
   --task scribblelens --criterion wav2vec --arch wav2vec2_scribblelens \
   --valid-subset test --pad-to-multiples-of 4 `#--max-sample-size 256` \
   --log-keys '["prob_perplexity","code_perplexity","temp"]' --quantize-targets --extractor-mode default \
@@ -18,4 +49,6 @@ python train.py --distributed-world-size 1 --update-freq 2 \
   `#--max-sample-size 250000 --min-sample-size 32000` \
   --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 --max-tokens 10000 --max-update 400000 \
   --skip-invalid-size-inputs-valid-test --ddp-backend no_c10d \
-  --enable-padding # crashes without that, needs to make all lines same-size
\ No newline at end of file
+  --enable-padding \
+  `# crashes without that, needs to make all lines same-size` \
+  > ../logfile$NUMVER.txt
\ No newline at end of file
diff --git a/uwr_related/shared.yml b/uwr_related/shared.yml
new file mode 100644
index 0000000000..7eccc4d480
--- /dev/null
+++ b/uwr_related/shared.yml
@@ -0,0 +1,399 @@
+name: 202010-fairseq
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=1_llvm
+  - abseil-cpp=20200225.2=he1b5a44_2
+  - aiohttp=3.7.2=py37h8f50634_0
+  - aliyun-python-sdk-core=2.13.26=py37hc8dfbb8_1
+  - aliyun-python-sdk-kms=2.12.0=pyh9f0ad1d_0
+  - appdirs=1.4.4=pyh9f0ad1d_0
+  - argcomplete=1.12.1=pyh9f0ad1d_0
+  - argon2-cffi=20.1.0=py37h8f50634_2
+  - arrow-cpp=2.0.0=py37hdd741e3_5_cpu
+  - async-timeout=3.0.1=py_1000
+  - async_generator=1.10=py_0
+  - atk=2.36.0=3
+  - atk-1.0=2.36.0=h63f31ab_3
+  - atpublic=1.0=py_0
+  - attrs=20.2.0=pyh9f0ad1d_0
+  - aws-c-common=0.4.59=he1b5a44_0
+  - aws-c-event-stream=0.1.6=h84e28f3_5
+  - aws-checksums=0.1.9=he252421_2
+  - aws-sdk-cpp=1.8.70=h9b98462_0
+  - azure-core=1.8.2=pyh9f0ad1d_0
+  - azure-storage-blob=12.5.0=pyh9f0ad1d_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=py_2
+  - backports.functools_lru_cache=1.6.1=py_0
+  - bcrypt=3.2.0=py37h8f50634_1
+  - blas=2.20=mkl
+  - bleach=3.2.1=pyh9f0ad1d_0
+  - blinker=1.4=py_1
+  - blosc=1.20.1=he1b5a44_0
+  - boto3=1.16.9=pyhd8ed1ab_0
+  - botocore=1.19.9=pyhd3deb0d_0
+  - brotli=1.0.9=he1b5a44_3
+  - brotli-python=1.0.9=py37hb892b2f_3
+  - brotlipy=0.7.0=py37hb5d75c8_1001
+  - brunsli=0.1=he1b5a44_0
+  - bzip2=1.0.8=h516909a_3
+  - c-ares=1.16.1=h516909a_3
+  - ca-certificates=2020.6.20=hecda079_0
+  - cachetools=4.1.1=py_0
+  - cairo=1.16.0=h488836b_1006
+  - certifi=2020.6.20=py37he5f6b98_2
+  - cffi=1.14.3=py37h00ebd2e_1
+  - chardet=3.0.4=py37he5f6b98_1008
+  - charls=2.1.0=he1b5a44_2
+  - click=7.1.2=pyh9f0ad1d_0
+  - cloudpickle=1.6.0=py_0
+  - colorama=0.4.4=pyh9f0ad1d_0
+  - commonmark=0.9.1=py_0
+  - configobj=5.0.6=py_0
+  - crcmod=1.7=py37hc8dfbb8_1003
+  - cryptography=3.2.1=py37hc72a4ac_0
+  - cudatoolkit=10.1.243=h6bb024c_0
+  - cudatoolkit-dev=10.1.243=h516909a_3
+  - cycler=0.10.0=py_2
+  - cython=0.29.21=py37hb892b2f_1
+  - cytoolz=0.11.0=py37h8f50634_1
+  - dash=1.17.0=pyhd8ed1ab_0
+  - dash-bootstrap-components=0.10.7=pyh9f0ad1d_0
+  - dash-core-components=1.13.0=pyhd8ed1ab_1
+  - dash-daq=0.5.0=pyh9f0ad1d_1
+  - dash-html-components=1.1.1=pyh9f0ad1d_0
+  - dash-renderer=1.8.3=pyhd8ed1ab_0
+  - dash-table=4.11.0=pyhd8ed1ab_0
+  - dash_colorscales=0.0.4=pyh9f0ad1d_0
+  - dask-core=2.30.0=py_0
+  - dbus=1.13.6=hfdff14a_1
+  - decorator=4.4.2=py_0
+  - defusedxml=0.6.0=py_0
+  - descartes=1.1.0=py_4
+  - distro=1.5.0=pyh9f0ad1d_0
+  - dpath=1.4.2=py37hc8dfbb8_1002
+  - dtale=1.20.0=pyhd3deb0d_0
+  - dvc=1.9.1=py37hc8dfbb8_0
+  - dvc-azure=1.9.1=py37hc8dfbb8_0
+  - dvc-base=1.9.1=py37hc8dfbb8_0
+  - dvc-gdrive=1.9.1=py37hc8dfbb8_0
+  - dvc-gs=1.9.1=py37hc8dfbb8_0
+  - dvc-hdfs=1.9.1=py37hc8dfbb8_0
+  - dvc-oss=1.9.1=py37hc8dfbb8_0
+  - dvc-s3=1.9.1=py37hc8dfbb8_0
+  - dvc-ssh=1.9.1=py37hc8dfbb8_0
+  - editdistance=0.5.3=py37h3340039_2
+  - entrypoints=0.3=py37hc8dfbb8_1002
+  - expat=2.2.9=he1b5a44_2
+  - ffmpeg=4.3.1=h3215721_1
+  - flask=1.1.2=pyh9f0ad1d_0
+  - flask-compress=1.7.0=pyh9f0ad1d_0
+  - flatten-dict=0.3.0=pyh9f0ad1d_0
+  - flufl.lock=3.2=py_0
+  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+  - font-ttf-inconsolata=2.001=hab24e00_0
+  - font-ttf-source-code-pro=2.030=hab24e00_0
+  - font-ttf-ubuntu=0.83=hab24e00_0
+  - fontconfig=2.13.1=h1056068_1002
+  - fonts-conda-ecosystem=1=0
+  - fonts-conda-forge=1=0
+  - freetype=2.10.4=h7ca028e_0
+  - fribidi=1.0.10=h516909a_0
+  - funcy=1.15=pyh9f0ad1d_0
+  - future=0.18.2=py37hc8dfbb8_2
+  - gdk-pixbuf=2.38.2=h3f25603_4
+  - gettext=0.19.8.1=hf34092f_1004
+  - gflags=2.2.2=he1b5a44_1004
+  - giflib=5.2.1=h36c2ea0_2
+  - gitdb=4.0.5=py_0
+  - gitdb2=4.0.2=py_0
+  - gitpython=3.1.11=py_0
+  - glib=2.66.2=h58526e2_0
+  - glog=0.4.0=h49b9bf7_3
+  - gmp=6.2.0=h58526e2_4
+  - gnutls=3.6.13=h79a8f9a_0
+  - gobject-introspection=1.66.1=py37h98f159f_2
+  - google-api-core=1.22.4=pyh9f0ad1d_0
+  - google-api-python-client=1.12.5=pyh9f0ad1d_0
+  - google-auth=1.23.0=pyhd8ed1ab_0
+  - google-auth-httplib2=0.0.4=pyh9f0ad1d_0
+  - google-cloud-core=1.4.3=pyh9f0ad1d_0
+  - google-cloud-storage=1.19.0=py_0
+  - google-crc32c=1.0.0=py37h193935f_1
+  - google-resumable-media=1.1.0=pyh9f0ad1d_0
+  - googleapis-common-protos=1.52.0=py37hc8dfbb8_0
+  - grandalf=0.6=py_0
+  - graphite2=1.3.13=he1b5a44_1001
+  - graphviz=2.42.3=h6939c30_2
+  - grpc-cpp=1.33.2=h7997a97_0
+  - grpcio=1.33.2=py37haffed2e_0
+  - gst-plugins-base=1.14.5=h0935bb2_2
+  - gstreamer=1.14.5=h36ae1b5_2
+  - gtk2=2.24.32=h194ddfc_3
+  - gts=0.7.6=h17b2bb4_1
+  - harfbuzz=2.7.2=hb1ce69c_1
+  - hdf5=1.10.6=nompi_h54c07f9_1110
+  - httplib2=0.18.1=pyh9f0ad1d_0
+  - icu=67.1=he1b5a44_0
+  - idna=2.10=pyh9f0ad1d_0
+  - imagecodecs=2020.5.30=py37h5f9c477_4
+  - imageio=2.9.0=py_0
+  - importlib-metadata=2.0.0=py_1
+  - importlib_metadata=2.0.0=1
+  - invoke=1.4.1=py_0
+  - ipykernel=5.3.4=py37hc6149b9_1
+  - ipympl=0.5.8=pyh9f0ad1d_0
+  - ipython=7.19.0=py37h888b3d9_0
+  - ipython_genutils=0.2.0=py_1
+  - ipywidgets=7.5.1=pyh9f0ad1d_1
+  - isodate=0.6.0=py_1
+  - itsdangerous=1.1.0=py_0
+  - jasper=1.900.1=h07fcdf6_1006
+  - jedi=0.17.2=py37hc8dfbb8_1
+  - jinja2=2.11.2=pyh9f0ad1d_0
+  - jmespath=0.10.0=pyh9f0ad1d_0
+  - joblib=0.17.0=py_0
+  - jpeg=9d=h36c2ea0_0
+  - json5=0.9.5=pyh9f0ad1d_0
+  - jsonpath-ng=1.5.2=pyh9f0ad1d_0
+  - jsonschema=3.2.0=py_2
+  - jupyter=1.0.0=py_2
+  - jupyter_client=6.1.7=py_0
+  - jupyter_console=6.2.0=py_0
+  - jupyter_core=4.6.3=py37hc8dfbb8_2
+  - jupyterlab=2.2.9=py_0
+  - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
+  - jupyterlab_server=1.2.0=py_0
+  - jupytext=1.6.0=pyh9f0ad1d_0
+  - jxrlib=1.1=h516909a_2
+  - kiwisolver=1.3.1=py37hc928c03_0
+  - knack=0.6.3=py_0
+  - krb5=1.17.1=hfafb76e_3
+  - lame=3.100=h14c3975_1001
+  - lcms2=2.11=hbd6801e_0
+  - ld_impl_linux-64=2.35=h769bd43_9
+  - lerc=2.2=he1b5a44_0
+  - libaec=1.0.4=he1b5a44_1
+  - libblas=3.8.0=20_mkl
+  - libcblas=3.8.0=20_mkl
+  - libclang=10.0.1=default_hde54327_1
+  - libcrc32c=1.1.1=he1b5a44_2
+  - libcurl=7.71.1=hcdd3856_8
+  - libedit=3.1.20191231=he28a2e2_2
+  - libev=4.33=h516909a_1
+  - libevent=2.1.10=hcdb4288_3
+  - libffi=3.2.1=he1b5a44_1007
+  - libgcc-ng=9.3.0=h5dbcf3e_17
+  - libgfortran-ng=7.5.0=hae1eefd_17
+  - libgfortran4=7.5.0=hae1eefd_17
+  - libglib=2.66.2=hbe7bbb4_0
+  - libiconv=1.16=h516909a_0
+  - liblapack=3.8.0=20_mkl
+  - liblapacke=3.8.0=20_mkl
+  - libllvm10=10.0.1=he513fc3_3
+  - libnghttp2=1.41.0=h8cfc5f6_2
+  - libopencv=4.5.0=py37_2
+  - libpng=1.6.37=h21135ba_2
+  - libpq=12.3=h5513abc_2
+  - libprotobuf=3.13.0.1=h8b12597_0
+  - libsodium=1.0.18=h516909a_1
+  - libssh2=1.9.0=hab1572f_5
+  - libstdcxx-ng=9.3.0=h2ae2ef3_17
+  - libthrift=0.13.0=h5aa387f_6
+  - libtiff=4.1.0=hc7e4089_6
+  - libtool=2.4.6=h58526e2_1006
+  - libutf8proc=2.5.0=h516909a_2
+  - libuuid=2.32.1=h14c3975_1000
+  - libuv=1.40.0=hd18ef5c_0
+  - libwebp=1.1.0=h56121f0_4
+  - libwebp-base=1.1.0=h516909a_3
+  - libxcb=1.13=h14c3975_1002
+  - libxkbcommon=0.10.0=he1b5a44_0
+  - libxml2=2.9.10=h68273f3_2
+  - libzopfli=1.0.3=he1b5a44_0
+  - llvm-openmp=11.0.0=hfc4b9b4_1
+  - llvmlite=0.34.0=py37h5202443_2
+  - lz4=3.1.0=py37h5a7ed16_1
+  - lz4-c=1.9.2=he1b5a44_3
+  - markdown-it-py=0.5.6=py_0
+  - markupsafe=1.1.1=py37hb5d75c8_2
+  - matplotlib-base=3.3.2=py37hc9afd2a_1
+  - mistune=0.8.4=py37h8f50634_1002
+  - mizani=0.7.2=pyhd8ed1ab_0
+  - mkl=2020.2=256
+  - msrest=0.6.19=pyh9f0ad1d_0
+  - multidict=4.7.5=py37h8f50634_2
+  - mysql-common=8.0.21=2
+  - mysql-libs=8.0.21=hf3661c5_2
+  - nanotime=0.5.2=py_0
+  - nbclient=0.5.1=py_0
+  - nbconvert=6.0.7=py37hc8dfbb8_2
+  - nbdime=2.1.0=py_0
+  - nbformat=5.0.8=py_0
+  - nccl=2.7.8.1=h51cf6c1_1
+  - ncurses=6.2=he1b5a44_2
+  - nest-asyncio=1.4.2=pyhd8ed1ab_0
+  - nettle=3.4.1=h1bed415_1002
+  - networkx=2.4=py_1
+  - ninja=1.10.1=hfc4b9b4_2
+  - notebook=6.1.4=py37hc8dfbb8_1
+  - nspr=4.29=he1b5a44_1
+  - nss=3.58=h27285de_1
+  - numba=0.51.2=py37h9fdb41a_0
+  - numpy=1.19.2=py37h7008fea_1
+  - oauth2client=4.1.3=py_0
+  - oauthlib=3.0.1=py_0
+  - olefile=0.46=pyh9f0ad1d_1
+  - openh264=2.1.1=h8b12597_0
+  - openjpeg=2.3.1=h981e76c_3
+  - openssl=1.1.1h=h516909a_0
+  - orc=1.6.5=hd3605a7_0
+  - oss2=2.13.0=py37hc8dfbb8_1
+  - packaging=20.4=pyh9f0ad1d_0
+  - palettable=3.3.0=py_0
+  - pandas=1.1.4=py37h10a2094_0
+  - pandoc=2.11.0.4=hd18ef5c_0
+  - pandocfilters=1.4.2=py_1
+  - pango=1.42.4=h80147aa_5
+  - paramiko=2.7.2=pyh9f0ad1d_0
+  - parquet-cpp=1.5.1=2
+  - parso=0.7.1=pyh9f0ad1d_0
+  - pathlib2=2.3.5=py37h89c1867_2
+  - pathspec=0.8.0=pyh9f0ad1d_0
+  - patsy=0.5.1=py_0
+  - pcre=8.44=he1b5a44_0
+  - pexpect=4.8.0=pyh9f0ad1d_2
+  - pickleshare=0.7.5=py_1003
+  - pillow=8.0.1=py37h718be6c_0
+  - pip=20.2.4=py_0
+  - pixman=0.38.0=h516909a_1003
+  - plotly=4.12.0=pyh9f0ad1d_0
+  - plotnine=0.7.1=py_0
+  - ply=3.11=py_1
+  - portalocker=1.7.0=py37hc8dfbb8_1
+  - prometheus_client=0.8.0=pyh9f0ad1d_0
+  - prompt-toolkit=3.0.8=py_0
+  - prompt_toolkit=3.0.8=0
+  - protobuf=3.13.0.1=py37h3340039_1
+  - pthread-stubs=0.4=h14c3975_1001
+  - ptvsd=4.3.2=py37h8f50634_3
+  - ptyprocess=0.6.0=py_1001
+  - py-opencv=4.5.0=py37hc6149b9_2
+  - pyarrow=2.0.0=py37h4935f41_5_cpu
+  - pyasn1=0.4.8=py_0
+  - pyasn1-modules=0.2.7=py_0
+  - pycparser=2.20=pyh9f0ad1d_2
+  - pycryptodome=3.9.8=py37hb05bdb2_1
+  - pydot=1.4.1=py37hc8dfbb8_1003
+  - pydrive2=1.6.3=pyh9f0ad1d_0
+  - pygments=2.7.2=py_0
+  - pygtrie=2.3.2=pyh8c360ce_0
+  - pyjwt=1.7.1=py_0
+  - pynacl=1.4.0=py37h8f50634_2
+  - pyopenssl=19.1.0=py_1
+  - pyparsing=2.4.7=pyh9f0ad1d_0
+  - pyqt=5.12.3=py37h8685d9f_4
+  - pyrsistent=0.17.3=py37h8f50634_1
+  - pysocks=1.7.1=py37he5f6b98_2
+  - python=3.7.8=h6f2ec95_1_cpython
+  - python-dateutil=2.8.1=py_0
+  - python_abi=3.7=1_cp37m
+  - pytorch=1.7.0=py3.7_cuda10.1.243_cudnn7.6.3_0
+  - pytz=2020.4=pyhd8ed1ab_0
+  - pywavelets=1.1.1=py37h161383b_3
+  - pyyaml=5.3.1=py37hb5d75c8_1
+  - pyzmq=19.0.2=py37hac76be4_2
+  - qgrid=1.3.1=py37hc8dfbb8_1
+  - qt=5.12.9=h1f2b2cb_0
+  - qtconsole=4.7.7=pyh9f0ad1d_0
+  - qtpy=1.9.0=py_0
+  - re2=2020.10.01=he1b5a44_0
+  - readline=8.0=he28a2e2_2
+  - regex=2020.10.28=py37h4abf009_0
+  - requests=2.24.0=pyh9f0ad1d_0
+  - requests-oauthlib=1.3.0=pyh9f0ad1d_0
+  - retrying=1.3.3=py_2
+  - rich=9.1.0=py37hc8dfbb8_0
+  - rsa=4.6=pyh9f0ad1d_0
+  - ruamel.yaml=0.16.12=py37h8f50634_1
+  - ruamel.yaml.clib=0.2.2=py37h8f50634_1
+  - s3transfer=0.3.3=py_3
+  - sacrebleu=1.4.14=pyh9f0ad1d_0
+  - scikit-fuzzy=0.4.2=py_1
+  - scikit-image=0.17.2=py37h10a2094_4
+  - scikit-learn=0.23.2=py37hddcf8d6_2
+  - scipy=1.5.3=py37h8911b10_0
+  - seaborn=0.11.0=ha770c72_1
+  - seaborn-base=0.11.0=pyhd8ed1ab_1
+  - send2trash=1.5.0=py_0
+  - setuptools=49.6.0=py37he5f6b98_2
+  - setuptools-scm=4.1.2=pyh9f0ad1d_0
+  - setuptools_scm=4.1.2=0
+  - shortuuid=1.0.1=py37hc8dfbb8_3
+  - shtab=1.3.2=pyh9f0ad1d_0
+  - simplejson=3.17.2=py37h8f50634_1
+  - six=1.15.0=pyh9f0ad1d_0
+  - smmap=3.0.4=pyh9f0ad1d_0
+  - snappy=1.1.8=he1b5a44_3
+  - sqlite=3.33.0=h4cf870e_1
+  - squarify=0.4.3=py_0
+  - statsmodels=0.12.1=py37ha21ca33_1
+  - strsimpy=0.1.9=pyh9f0ad1d_0
+  - tabulate=0.8.7=pyh9f0ad1d_0
+  - terminado=0.9.1=py37hc8dfbb8_1
+  - testpath=0.4.4=py_0
+  - threadpoolctl=2.1.0=pyh5ca1d4c_0
+  - tifffile=2020.10.1=py_0
+  - tk=8.6.10=hed695b0_1
+  - toml=0.10.2=pyhd8ed1ab_0
+  - toolz=0.11.1=py_0
+  - torchvision=0.8.1=py37_cu101
+  - tornado=6.1=py37h4abf009_0
+  - tqdm=4.51.0=pyh9f0ad1d_0
+  - traitlets=5.0.5=py_0
+  - typing=3.7.4.3=py37hc8dfbb8_1
+  - typing-extensions=3.7.4.3=0
+  - typing_extensions=3.7.4.3=py_0
+  - uritemplate=3.0.1=py_0
+  - urllib3=1.25.11=py_0
+  - voluptuous=0.11.7=py_0
+  - wcwidth=0.2.5=pyh9f0ad1d_2
+  - webencodings=0.5.1=py_1
+  - werkzeug=1.0.1=pyh9f0ad1d_0
+  - wheel=0.35.1=pyh9f0ad1d_0
+  - widgetsnbextension=3.5.1=py37hc8dfbb8_4
+  - x264=1!152.20180806=h14c3975_0
+  - xarray=0.16.1=py_0
+  - xorg-kbproto=1.0.7=h14c3975_1002
+  - xorg-libice=1.0.10=h516909a_0
+  - xorg-libsm=1.2.3=h84519dc_1000
+  - xorg-libx11=1.6.12=h516909a_0
+  - xorg-libxau=1.0.9=h14c3975_0
+  - xorg-libxdmcp=1.1.3=h516909a_0
+  - xorg-libxext=1.3.4=h516909a_0
+  - xorg-libxpm=3.5.13=h516909a_0
+  - xorg-libxrender=0.9.10=h516909a_1002
+  - xorg-libxt=1.1.5=h516909a_1003
+  - xorg-renderproto=0.11.1=h14c3975_1002
+  - xorg-xextproto=7.3.0=h14c3975_1002
+  - xorg-xproto=7.0.31=h14c3975_1007
+  - xz=5.2.5=h516909a_1
+  - yaml=0.2.5=h516909a_0
+  - yarl=1.6.2=py37h8f50634_0
+  - zc.lockfile=2.0=py_0
+  - zeromq=4.3.3=he1b5a44_2
+  - zfp=0.5.5=he1b5a44_4
+  - zipp=3.4.0=py_0
+  - zlib=1.2.11=h516909a_1010
+  - zstd=1.4.5=h6597ccf_2
+  - pip:
+    - --install-option="--prefix=/pio/shared/1/i273233/shared" soundfile==0.10.3.post1 
+    - --install-option="--prefix=/pio/shared/1/i273233/shared" tensorboardx==2.1
+    - --install-option="--prefix=/pio/shared/1/i273233/shared" apex==0.1
+    - -e ./..
+prefix: /pio/scratch/2/i273233/miniconda3/envs/202010-fairseq
diff --git a/uwr_related/test_cmd_scribble.sh b/uwr_related/test_cmd_scribble.sh
index d797baa819..59065ce1b6 100644
--- a/uwr_related/test_cmd_scribble.sh
+++ b/uwr_related/test_cmd_scribble.sh
@@ -37,9 +37,30 @@
 #   --enable_padding  # crashes without that, needs to make all lines same-size
 
 
+#python train.py --distributed-world-size 1 --update-freq 2 \
+#  /pio/scratch/1/i283340/MGR/NewSetup/DistSup/data \
+#  --save-dir ../try_sl1 --num-workers 0 \
+#  --task scribblelens --criterion wav2vec --arch wav2vec2_scribblelens \
+#  --valid-subset test --pad-to-multiples-of 4 `#--max-sample-size 256` \
+#  --log-keys '["prob_perplexity","code_perplexity","temp"]' --quantize-targets --extractor-mode default \
+#  --conv-feature-layers '[(64, (3, 3), (1, 2), (1, 1)), (128, (5, 5), (2, 2), (2, 2)), (256, (3,3), (1, 1), (1, 1)), (256, (3,3), (1, 2), (1, 1)), (512, (3,3), (1, 1), (1, 1)), (512, (3,3), (1, 2), (1, 1)), (512, (3,2), (2, 1), (1, 0))]' \
+#  --final-dim 256 \
+#  --latent-vars 320 --latent-groups 2 --latent-temp '(2,0.5,0.999995)' --infonce \
+#  --optimizer adam --adam-betas '(0.9,0.98)' --adam-eps 1e-06 --lr-scheduler polynomial_decay \
+#  --total-num-update 400000 --lr 0.0005 --warmup-updates 32000 \
+#  --mask-length 10 --mask-prob 0.65 --mask-selection static --mask-other 0 \
+#  --encoder-layerdrop 0.05 --dropout-input 0.1 --dropout-features 0.1 --feature-grad-mult 0.1 \
+#  --loss-weights '[0.1, 10]' --conv-pos 128 --conv-pos-groups 16 \
+#  --num-negatives 100 --cross-sample-negatives 0 \
+#  `#--max-sample-size 250000 --min-sample-size 32000` \
+#  --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 --max-tokens 10000 --max-update 400000 \
+#  --skip-invalid-size-inputs-valid-test --ddp-backend no_c10d \
+#  --enable-padding # crashes without that, needs to make all lines same-size
+
 python train.py --distributed-world-size 1 --update-freq 2 \
   /pio/scratch/1/i283340/MGR/NewSetup/DistSup/data \
-  --save-dir ../try_sl1 --num-workers 0 \
+  --save-dir /pio/scratch/1/i273233/runs/try_sl3 --num-workers 0 \
+  --tensorboard-logdir /pio/scratch/1/i273233/runs/try_sl3 --log-format simple  \
   --task scribblelens --criterion wav2vec --arch wav2vec2_scribblelens \
   --valid-subset test --pad-to-multiples-of 4 `#--max-sample-size 256` \
   --log-keys '["prob_perplexity","code_perplexity","temp"]' --quantize-targets --extractor-mode default \