bump version from 0.3.0 -> 0.3.1

* add `trim_first_g` option * minor document updates
ArcInstitute · May 12, 2024 · 067035d · 067035d
2 parents a2bad58 + 3135061
commit 067035d
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -156,7 +156,8 @@ adata = ad.AnnData(
 
 screen = PooledScreens(adata)
 ```
-<img width="600" alt="image" src="https://github.com/abearab/ScreenPro2/assets/53412130/d1c8c3ad-3668-4390-8b1d-bf72b591a927">
+
+<img width="600" alt="image" src="https://github.com/ArcInstitute/ScreenPro2/assets/53412130/bb38d119-8f24-44fa-98ab-7ef4457ef8d2">
 
 #### Perform Screen Processing Analysis
 Once the screen object is created, you can use several available workflows to calculate the phenotype scores and statisitics by comparing each entry in reference sgRNA library between screen arms. Then, these scores and statistics are used to nominate hits.
@@ -212,12 +213,16 @@ Currently, ScreenPro2 has easy-to-use workflows for the following CRISPR screen
 ### dCas9 CRISPRa/i single-sgRNA screens
 [Horlbeck et al., _eLife_ (2016)](http://dx.doi.org/10.7554/eLife.19760)
 
-Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon.
+Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen.
+
+<!-- TODO: Add link to example / tutorial -->
 
 ### dCas9 CRISPRa/i dual-sgRNA screens
 [Replogle et al., _eLife_ (2022)](https://elifesciences.org/articles/81856)
 
-Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon.
+Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen.
+
+<!-- TODO: Add link to example / tutorial -->
 
 <!-- ### multiCas12a CRISPRi screens -->
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -12,7 +12,6 @@ Welcome to ScreenPro2's documentation!
    :maxdepth: 3
    :caption: Module Documentation
 
-   screenpro
    assays
    ngs
    phenotype

diff --git a/docs/source/screenpro.rst b/docs/source/screenpro.rst
diff --git a/screenpro/__init__.py b/screenpro/__init__.py
@@ -6,6 +6,6 @@
 from .ngs import Counter
 from .assays import PooledScreens, GImaps
 
-__version__ = "0.3.0"
+__version__ = "0.3.1"
 __author__ = "Abe Arab"
 __email__ = '[email protected]' # "[email protected]"
diff --git a/screenpro/ngs/counter.py b/screenpro/ngs/counter.py
@@ -49,7 +49,7 @@ def _get_sgRNA_table(self):
 
         return sgRNA_table
 
-    def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protospacer_length, verbose=False):
+    def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, trim_first_g, protospacer_length, write, verbose=False):
         if verbose: print(green(sample_id, ['bold']))
         get_counts = True
 
@@ -63,9 +63,13 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp
                 if verbose: print('skip loading count file, force write is set ...')
 
         if get_counts:
+            if trim_first_g:
+                trim5p_start = 2
+            else:
+                trim5p_start = 1
             df_count = cas9.fastq_to_count_single_guide(
                 fastq_file_path=f'{fastq_dir}/{sample_id}.fastq.gz',
-                trim5p_start=1,
+                trim5p_start=trim5p_start,
                 trim5p_length=protospacer_length,
                 verbose=verbose
             )
@@ -83,7 +87,7 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp
 
         return out
 
-    def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, write, protospacer_A_length, protospacer_B_length, verbose=False):
+    def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, trim_first_g, protospacer_A_length, protospacer_B_length, write, verbose=False):
         if verbose: print(green(sample_id, ['bold']))
         get_counts = True
 
@@ -97,12 +101,28 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant,
                 if verbose: print('skip loading count file, force write is set ...')
 
         if get_counts:
+            if get_counts:
+                if trim_first_g == True or trim_first_g == {'A':True, 'B':True}:
+                    trim5p_pos1_start = 2
+                    trim5p_pos2_start = 2
+                elif trim_first_g == False or trim_first_g == {'A':False, 'B':False}:
+                    trim5p_pos1_start = 1
+                    trim5p_pos2_start = 1
+                elif trim_first_g == {'A':True, 'B':False}:
+                    trim5p_pos1_start = 2
+                    trim5p_pos2_start = 1
+                elif trim_first_g == {'A':False, 'B':True}:
+                    trim5p_pos1_start = 1
+                    trim5p_pos2_start = 2
+                else:
+                    raise ValueError("Invalid trim_first_g argument. Please provide a boolean or a dictionary with 'A' and 'B' keys.")
+
             df_count = cas9.fastq_to_count_dual_guide(
                 R1_fastq_file_path=f'{fastq_dir}/{sample_id}_R1.fastq.gz',
                 R2_fastq_file_path=f'{fastq_dir}/{sample_id}_R2.fastq.gz',
-                trim5p_pos1_start=1,
+                trim5p_pos1_start=trim5p_pos1_start,
                 trim5p_pos1_length=protospacer_A_length,
-                trim5p_pos2_start=1,
+                trim5p_pos2_start=trim5p_pos2_start,
                 trim5p_pos2_length=protospacer_B_length,
                 verbose=verbose
             )
@@ -121,7 +141,7 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant,
 
         return out
 
-    def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', write=True, parallel=False, verbose=False):
+    def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', trim_first_g=False, write=True, parallel=False, verbose=False):
         '''Get count matrix for given samples
         '''
         if self.cas_type == 'cas9':
@@ -141,8 +161,9 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type=
                         cnt = self._process_cas9_single_guide_sample(
                             fastq_dir=fastq_dir, 
                             sample_id=sample_id, 
-                            write=write, 
+                            trim_first_g=trim_first_g,
                             protospacer_length=protospacer_length,
+                            write=write,
                             verbose=verbose
                         )
 
@@ -181,9 +202,10 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type=
                             fastq_dir=fastq_dir, 
                             sample_id=sample_id, 
                             get_recombinant=get_recombinant, 
-                            write=write, 
+                            trim_first_g=trim_first_g,
                             protospacer_A_length=protospacer_A_length,
                             protospacer_B_length=protospacer_B_length,
+                            write=write, 
                             verbose=verbose
                         )
                         counts[sample_id] = cnt['mapped']