Skip to content

Commit

Permalink
bump version from 0.3.0 -> 0.3.1
Browse files Browse the repository at this point in the history
* add `trim_first_g` option
* minor document updates
  • Loading branch information
abearab authored May 12, 2024
2 parents a2bad58 + 3135061 commit 067035d
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 26 deletions.
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ adata = ad.AnnData(

screen = PooledScreens(adata)
```
<img width="600" alt="image" src="https://github.com/abearab/ScreenPro2/assets/53412130/d1c8c3ad-3668-4390-8b1d-bf72b591a927">

<img width="600" alt="image" src="https://github.com/ArcInstitute/ScreenPro2/assets/53412130/bb38d119-8f24-44fa-98ab-7ef4457ef8d2">

#### Perform Screen Processing Analysis
Once the screen object is created, you can use several available workflows to calculate the phenotype scores and statisitics by comparing each entry in reference sgRNA library between screen arms. Then, these scores and statistics are used to nominate hits.
Expand Down Expand Up @@ -212,12 +213,16 @@ Currently, ScreenPro2 has easy-to-use workflows for the following CRISPR screen
### dCas9 CRISPRa/i single-sgRNA screens
[Horlbeck et al., _eLife_ (2016)](http://dx.doi.org/10.7554/eLife.19760)

Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon.
Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen.

<!-- TODO: Add link to example / tutorial -->

### dCas9 CRISPRa/i dual-sgRNA screens
[Replogle et al., _eLife_ (2022)](https://elifesciences.org/articles/81856)

Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon.
Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen.

<!-- TODO: Add link to example / tutorial -->

<!-- ### multiCas12a CRISPRi screens -->

Expand Down
1 change: 0 additions & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ Welcome to ScreenPro2's documentation!
:maxdepth: 3
:caption: Module Documentation

screenpro
assays
ngs
phenotype
Expand Down
13 changes: 0 additions & 13 deletions docs/source/screenpro.rst

This file was deleted.

2 changes: 1 addition & 1 deletion screenpro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
from .ngs import Counter
from .assays import PooledScreens, GImaps

__version__ = "0.3.0"
__version__ = "0.3.1"
__author__ = "Abe Arab"
__email__ = '[email protected]' # "[email protected]"
38 changes: 30 additions & 8 deletions screenpro/ngs/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _get_sgRNA_table(self):

return sgRNA_table

def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protospacer_length, verbose=False):
def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, trim_first_g, protospacer_length, write, verbose=False):
if verbose: print(green(sample_id, ['bold']))
get_counts = True

Expand All @@ -63,9 +63,13 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp
if verbose: print('skip loading count file, force write is set ...')

if get_counts:
if trim_first_g:
trim5p_start = 2
else:
trim5p_start = 1
df_count = cas9.fastq_to_count_single_guide(
fastq_file_path=f'{fastq_dir}/{sample_id}.fastq.gz',
trim5p_start=1,
trim5p_start=trim5p_start,
trim5p_length=protospacer_length,
verbose=verbose
)
Expand All @@ -83,7 +87,7 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp

return out

def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, write, protospacer_A_length, protospacer_B_length, verbose=False):
def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, trim_first_g, protospacer_A_length, protospacer_B_length, write, verbose=False):
if verbose: print(green(sample_id, ['bold']))
get_counts = True

Expand All @@ -97,12 +101,28 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant,
if verbose: print('skip loading count file, force write is set ...')

if get_counts:
if get_counts:
if trim_first_g == True or trim_first_g == {'A':True, 'B':True}:
trim5p_pos1_start = 2
trim5p_pos2_start = 2
elif trim_first_g == False or trim_first_g == {'A':False, 'B':False}:
trim5p_pos1_start = 1
trim5p_pos2_start = 1
elif trim_first_g == {'A':True, 'B':False}:
trim5p_pos1_start = 2
trim5p_pos2_start = 1
elif trim_first_g == {'A':False, 'B':True}:
trim5p_pos1_start = 1
trim5p_pos2_start = 2
else:
raise ValueError("Invalid trim_first_g argument. Please provide a boolean or a dictionary with 'A' and 'B' keys.")

df_count = cas9.fastq_to_count_dual_guide(
R1_fastq_file_path=f'{fastq_dir}/{sample_id}_R1.fastq.gz',
R2_fastq_file_path=f'{fastq_dir}/{sample_id}_R2.fastq.gz',
trim5p_pos1_start=1,
trim5p_pos1_start=trim5p_pos1_start,
trim5p_pos1_length=protospacer_A_length,
trim5p_pos2_start=1,
trim5p_pos2_start=trim5p_pos2_start,
trim5p_pos2_length=protospacer_B_length,
verbose=verbose
)
Expand All @@ -121,7 +141,7 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant,

return out

def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', write=True, parallel=False, verbose=False):
def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', trim_first_g=False, write=True, parallel=False, verbose=False):
'''Get count matrix for given samples
'''
if self.cas_type == 'cas9':
Expand All @@ -141,8 +161,9 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type=
cnt = self._process_cas9_single_guide_sample(
fastq_dir=fastq_dir,
sample_id=sample_id,
write=write,
trim_first_g=trim_first_g,
protospacer_length=protospacer_length,
write=write,
verbose=verbose
)

Expand Down Expand Up @@ -181,9 +202,10 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type=
fastq_dir=fastq_dir,
sample_id=sample_id,
get_recombinant=get_recombinant,
write=write,
trim_first_g=trim_first_g,
protospacer_A_length=protospacer_A_length,
protospacer_B_length=protospacer_B_length,
write=write,
verbose=verbose
)
counts[sample_id] = cnt['mapped']
Expand Down

0 comments on commit 067035d

Please sign in to comment.