Merge branch 'master' of https://github.com/dereneaton/ipyrad

dereneaton · Jan 3, 2020 · 0306f25 · 0306f25
2 parents d12e720 + 0aaec41
commit 0306f25
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 18 deletions.
diff --git a/ipyrad/__init__.py b/ipyrad/__init__.py
@@ -9,7 +9,7 @@
 import subprocess as _sps
 
 # Dunders
-__version__ = "0.9.24"
+__version__ = "0.9.26"
 __author__ = "Deren Eaton & Isaac Overcast"
 
 # CLI __main__ changes to 0

diff --git a/ipyrad/assemble/clustmap.py b/ipyrad/assemble/clustmap.py
@@ -71,7 +71,7 @@ def run(self):
             if self.data.params.assembly_method == "denovo":
 
                 # vsearch merge read pairs back together based on overlap.
-                # i: edits/concatedits.fq.gz or sample.files.edits
+                # i: tmpdir/concatedits.fq.gz or sample.files.edits
                 # o: tmpdir/{}_merged.fastq, tmpdir/{}_nonmerged_R[1,2].fastq
                 self.remote_run(
                     function=merge_pairs_with_vsearch,
@@ -718,7 +718,7 @@ def dereplicate(data, sample, nthreads):
             data.dirs.edits,
             "{}.trimmed_R1_.fastq.gz".format(sample.name)),
         os.path.join(
-            data.dirs.edits, 
+            data.tmpdir,
             "{}_R1_concatedit.fq.gz".format(sample.name)),
         os.path.join(
             data.tmpdir, 
@@ -732,7 +732,7 @@ def dereplicate(data, sample, nthreads):
 
     # datatypes options
     strand = "plus"
-    if data.params.datatype is ('gbs' or '2brad'):
+    if data.params.datatype in ['gbs', '2brad']:
         strand = "both"
 
     # do dereplication with vsearch
@@ -743,6 +743,7 @@ def dereplicate(data, sample, nthreads):
         "--output", os.path.join(data.tmpdir, sample.name + "_derep.fa"),
         # "--threads", str(nthreads),
         "--fasta_width", str(0),
+        "--minseqlength",  str(data.params.filter_min_trim_len),
         "--sizeout", 
         "--relabel_md5",
         "--quiet",
@@ -765,10 +766,10 @@ def concat_multiple_edits(data, sample):
 
     # define output files
     concat1 = os.path.join(
-        data.dirs.edits,
+        data.tmpdir,
         "{}_R1_concatedit.fq.gz".format(sample.name))
     concat2 = os.path.join(
-        data.dirs.edits,
+        data.tmpdir,
         "{}_R2_concatedit.fq.gz".format(sample.name))
 
     # check for files to concat
@@ -801,12 +802,12 @@ def merge_pairs_with_vsearch(data, sample, revcomp):
     # input files (select only the top one)
     in1 = [
         os.path.join(data.tmpdir, "{}-tmp-umap1.fastq".format(sample.name)),
-        os.path.join(data.dirs.edits, "{}_R1_concatedit.fq.gz".format(sample.name)),
+        os.path.join(data.tmpdir, "{}_R1_concatedit.fq.gz".format(sample.name)),
         sample.files.edits[0][0],        
     ]
     in2 = [
         os.path.join(data.tmpdir, "{}-tmp-umap2.fastq".format(sample.name)),
-        os.path.join(data.dirs.edits, "{}_R2_concatedit.fq.gz".format(sample.name)),
+        os.path.join(data.tmpdir, "{}_R2_concatedit.fq.gz".format(sample.name)),
         sample.files.edits[0][1],
     ]
     index = min([i for i, j in enumerate(in1) if os.path.exists(j)])
@@ -901,16 +902,15 @@ def merge_end_to_end(data, sample, revcomp, append, identical=False):
     concat2 = os.path.join(
         data.tmpdir, 
         "{}_R2_concatedit.fq.gz".format(sample.name))
-    edits1 = os.path.join(
-        data.dirs.edits,
-        "{}.trimmed_R1_.fastq.gz".format(sample.name))
-    edits2 = os.path.join(
-        data.dirs.edits, 
-        "{}.trimmed_R2_.fastq.gz".format(sample.name))
+    # data.dirs.edits doesn't exist if you merge after step 2, so
+    # here we access the edits files through the sample object.
+    # Sorry it makes the code less harmonious. iao 12/31/19.
+    edits1 = sample.files.edits[0][0]
+    edits2 = sample.files.edits[0][1]
 
     # file precedence
     order1 = (edits1, concat1, nonmerged1, altmapped1)
-    order2 = (edits2, concat2, nonmerged2, altmapped2)    
+    order2 = (edits2, concat2, nonmerged2, altmapped2)
     nonm1 = [i for i in order1 if os.path.exists(i)][-1]
     nonm2 = [i for i in order2 if os.path.exists(i)][-1]
 
@@ -1066,6 +1066,7 @@ def cluster(data, sample, nthreads, force):
            "-threads", str(nthreads),
            "-notmatched", temphandle,
            "-fasta_width", "0",
+           "--minseqlength",  str(data.params.filter_min_trim_len),
            # "-fastq_qmax", "100",
            "-fulldp",
            "-usersort"]
@@ -2100,8 +2101,15 @@ def build_clusters_from_cigars(data, sample):
     """
     # get all regions with reads. Generator to yield (str, int, int)
     fullregions = bedtools_merge(data, sample).strip().split("\n")
-    regions = (i.split("\t") for i in fullregions)
-    regions = ((i, int(j), int(k)) for (i, j, k) in regions)
+    # If no reads map to reference the fullregions will be [''], so
+    # we test for it and handle it properly. If you don't do this the regions
+    # genrator will be empty and this will raise a ValueError. This will just
+    # pass through samples without any mapped reads with a 0 length clustS file
+    if len(fullregions[0]):
+        regions = (i.split("\t") for i in fullregions)
+        regions = ((i, int(j), int(k)) for (i, j, k) in regions)
+    else:
+        regions = []
 
     # access reads from bam file using pysam
     bamfile = pysam.AlignmentFile(

diff --git a/ipyrad/assemble/clustmap_across.py b/ipyrad/assemble/clustmap_across.py
@@ -978,6 +978,7 @@ def cluster(data, jobid, nthreads, print_progress=False):
            "-maxaccepts", "1",
            "-maxrejects", "0",
            "-fasta_width", "0",
+           "--minseqlength",  str(data.params.filter_min_trim_len),
            "-threads", str(nthreads),  # "0",
            "-fulldp",
            "-usersort",

diff --git a/ipyrad/assemble/consens_se.py b/ipyrad/assemble/consens_se.py
@@ -641,7 +641,7 @@ def filter_maxhetero(self):
 
     def filter_maxN_minLen(self):
         "Return 1 if it PASSED the filter, else 0"        
-        if self.consens.size >= 32:
+        if self.consens.size >= self.data.params.filter_min_trim_len:
             nns = self.consens[self.consens == b"N"].size
             if nns > (len(self.consens) * self.maxn):
                 self.filters['maxn'] += 1

diff --git a/newdocs/releasenotes.rst b/newdocs/releasenotes.rst
@@ -5,6 +5,14 @@
 Release Notes
 =============
 
+0.9.26
+------
+**January 01, 2020**
+
+0.9.25
+------
+**December 31, 2019**
+
 0.9.24
 ------
 **December 24, 2019**