diff --git a/README.md b/README.md
index 99c5dc8..0c26756 100644
--- a/README.md
+++ b/README.md
@@ -156,7 +156,8 @@ adata = ad.AnnData(
screen = PooledScreens(adata)
```
-
+
+
#### Perform Screen Processing Analysis
Once the screen object is created, you can use several available workflows to calculate the phenotype scores and statisitics by comparing each entry in reference sgRNA library between screen arms. Then, these scores and statistics are used to nominate hits.
@@ -212,12 +213,16 @@ Currently, ScreenPro2 has easy-to-use workflows for the following CRISPR screen
### dCas9 CRISPRa/i single-sgRNA screens
[Horlbeck et al., _eLife_ (2016)](http://dx.doi.org/10.7554/eLife.19760)
-Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon.
+Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen.
+
+
### dCas9 CRISPRa/i dual-sgRNA screens
[Replogle et al., _eLife_ (2022)](https://elifesciences.org/articles/81856)
-Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon.
+Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen.
+
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0956c85..4d9dd0f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -12,7 +12,6 @@ Welcome to ScreenPro2's documentation!
:maxdepth: 3
:caption: Module Documentation
- screenpro
assays
ngs
phenotype
diff --git a/docs/source/screenpro.rst b/docs/source/screenpro.rst
deleted file mode 100644
index dffd7b7..0000000
--- a/docs/source/screenpro.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-Utility functions
-======================
-
-.. automodule:: screenpro
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-.. automodule:: screenpro.utils
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/screenpro/__init__.py b/screenpro/__init__.py
index 59bc90b..07536f1 100644
--- a/screenpro/__init__.py
+++ b/screenpro/__init__.py
@@ -6,6 +6,6 @@
from .ngs import Counter
from .assays import PooledScreens, GImaps
-__version__ = "0.3.0"
+__version__ = "0.3.1"
__author__ = "Abe Arab"
__email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com"
\ No newline at end of file
diff --git a/screenpro/ngs/counter.py b/screenpro/ngs/counter.py
index ebf133f..a9fedc6 100644
--- a/screenpro/ngs/counter.py
+++ b/screenpro/ngs/counter.py
@@ -49,7 +49,7 @@ def _get_sgRNA_table(self):
return sgRNA_table
- def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protospacer_length, verbose=False):
+ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, trim_first_g, protospacer_length, write, verbose=False):
if verbose: print(green(sample_id, ['bold']))
get_counts = True
@@ -63,9 +63,13 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp
if verbose: print('skip loading count file, force write is set ...')
if get_counts:
+ if trim_first_g:
+ trim5p_start = 2
+ else:
+ trim5p_start = 1
df_count = cas9.fastq_to_count_single_guide(
fastq_file_path=f'{fastq_dir}/{sample_id}.fastq.gz',
- trim5p_start=1,
+ trim5p_start=trim5p_start,
trim5p_length=protospacer_length,
verbose=verbose
)
@@ -83,7 +87,7 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp
return out
- def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, write, protospacer_A_length, protospacer_B_length, verbose=False):
+ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, trim_first_g, protospacer_A_length, protospacer_B_length, write, verbose=False):
if verbose: print(green(sample_id, ['bold']))
get_counts = True
@@ -97,12 +101,28 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant,
if verbose: print('skip loading count file, force write is set ...')
if get_counts:
+ if get_counts:
+ if trim_first_g == True or trim_first_g == {'A':True, 'B':True}:
+ trim5p_pos1_start = 2
+ trim5p_pos2_start = 2
+ elif trim_first_g == False or trim_first_g == {'A':False, 'B':False}:
+ trim5p_pos1_start = 1
+ trim5p_pos2_start = 1
+ elif trim_first_g == {'A':True, 'B':False}:
+ trim5p_pos1_start = 2
+ trim5p_pos2_start = 1
+ elif trim_first_g == {'A':False, 'B':True}:
+ trim5p_pos1_start = 1
+ trim5p_pos2_start = 2
+ else:
+ raise ValueError("Invalid trim_first_g argument. Please provide a boolean or a dictionary with 'A' and 'B' keys.")
+
df_count = cas9.fastq_to_count_dual_guide(
R1_fastq_file_path=f'{fastq_dir}/{sample_id}_R1.fastq.gz',
R2_fastq_file_path=f'{fastq_dir}/{sample_id}_R2.fastq.gz',
- trim5p_pos1_start=1,
+ trim5p_pos1_start=trim5p_pos1_start,
trim5p_pos1_length=protospacer_A_length,
- trim5p_pos2_start=1,
+ trim5p_pos2_start=trim5p_pos2_start,
trim5p_pos2_length=protospacer_B_length,
verbose=verbose
)
@@ -121,7 +141,7 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant,
return out
- def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', write=True, parallel=False, verbose=False):
+ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', trim_first_g=False, write=True, parallel=False, verbose=False):
'''Get count matrix for given samples
'''
if self.cas_type == 'cas9':
@@ -141,8 +161,9 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type=
cnt = self._process_cas9_single_guide_sample(
fastq_dir=fastq_dir,
sample_id=sample_id,
- write=write,
+ trim_first_g=trim_first_g,
protospacer_length=protospacer_length,
+ write=write,
verbose=verbose
)
@@ -181,9 +202,10 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type=
fastq_dir=fastq_dir,
sample_id=sample_id,
get_recombinant=get_recombinant,
- write=write,
+ trim_first_g=trim_first_g,
protospacer_A_length=protospacer_A_length,
protospacer_B_length=protospacer_B_length,
+ write=write,
verbose=verbose
)
counts[sample_id] = cnt['mapped']