From d282e8ae82cae9136d2130bc087809c0461a8889 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Sat, 11 May 2024 17:51:28 -0700 Subject: [PATCH 1/5] add `trim_first_g` argument --- screenpro/ngs/counter.py | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/screenpro/ngs/counter.py b/screenpro/ngs/counter.py index ebf133f..a9fedc6 100644 --- a/screenpro/ngs/counter.py +++ b/screenpro/ngs/counter.py @@ -49,7 +49,7 @@ def _get_sgRNA_table(self): return sgRNA_table - def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protospacer_length, verbose=False): + def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, trim_first_g, protospacer_length, write, verbose=False): if verbose: print(green(sample_id, ['bold'])) get_counts = True @@ -63,9 +63,13 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp if verbose: print('skip loading count file, force write is set ...') if get_counts: + if trim_first_g: + trim5p_start = 2 + else: + trim5p_start = 1 df_count = cas9.fastq_to_count_single_guide( fastq_file_path=f'{fastq_dir}/{sample_id}.fastq.gz', - trim5p_start=1, + trim5p_start=trim5p_start, trim5p_length=protospacer_length, verbose=verbose ) @@ -83,7 +87,7 @@ def _process_cas9_single_guide_sample(self, fastq_dir, sample_id, write, protosp return out - def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, write, protospacer_A_length, protospacer_B_length, verbose=False): + def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, trim_first_g, protospacer_A_length, protospacer_B_length, write, verbose=False): if verbose: print(green(sample_id, ['bold'])) get_counts = True @@ -97,12 +101,28 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, if verbose: print('skip loading count file, force write is set ...') if get_counts: + if get_counts: + if trim_first_g == True or trim_first_g == {'A':True, 'B':True}: + trim5p_pos1_start = 2 + trim5p_pos2_start = 2 + elif trim_first_g == False or trim_first_g == {'A':False, 'B':False}: + trim5p_pos1_start = 1 + trim5p_pos2_start = 1 + elif trim_first_g == {'A':True, 'B':False}: + trim5p_pos1_start = 2 + trim5p_pos2_start = 1 + elif trim_first_g == {'A':False, 'B':True}: + trim5p_pos1_start = 1 + trim5p_pos2_start = 2 + else: + raise ValueError("Invalid trim_first_g argument. Please provide a boolean or a dictionary with 'A' and 'B' keys.") + df_count = cas9.fastq_to_count_dual_guide( R1_fastq_file_path=f'{fastq_dir}/{sample_id}_R1.fastq.gz', R2_fastq_file_path=f'{fastq_dir}/{sample_id}_R2.fastq.gz', - trim5p_pos1_start=1, + trim5p_pos1_start=trim5p_pos1_start, trim5p_pos1_length=protospacer_A_length, - trim5p_pos2_start=1, + trim5p_pos2_start=trim5p_pos2_start, trim5p_pos2_length=protospacer_B_length, verbose=verbose ) @@ -121,7 +141,7 @@ def _process_cas9_dual_guide_sample(self, fastq_dir, sample_id, get_recombinant, return out - def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', write=True, parallel=False, verbose=False): + def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type='cas9', protospacer_length='auto', trim_first_g=False, write=True, parallel=False, verbose=False): '''Get count matrix for given samples ''' if self.cas_type == 'cas9': @@ -141,8 +161,9 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type= cnt = self._process_cas9_single_guide_sample( fastq_dir=fastq_dir, sample_id=sample_id, - write=write, + trim_first_g=trim_first_g, protospacer_length=protospacer_length, + write=write, verbose=verbose ) @@ -181,9 +202,10 @@ def get_counts_matrix(self, fastq_dir, samples, get_recombinant=False, cas_type= fastq_dir=fastq_dir, sample_id=sample_id, get_recombinant=get_recombinant, - write=write, + trim_first_g=trim_first_g, protospacer_A_length=protospacer_A_length, protospacer_B_length=protospacer_B_length, + write=write, verbose=verbose ) counts[sample_id] = cnt['mapped'] From 69d1ce8b52f9755291862a17cfee493aba631099 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Sat, 11 May 2024 19:37:30 -0700 Subject: [PATCH 2/5] improve docs --- docs/source/index.rst | 1 - docs/source/screenpro.rst | 13 ------------- 2 files changed, 14 deletions(-) delete mode 100644 docs/source/screenpro.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 0956c85..4d9dd0f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,7 +12,6 @@ Welcome to ScreenPro2's documentation! :maxdepth: 3 :caption: Module Documentation - screenpro assays ngs phenotype diff --git a/docs/source/screenpro.rst b/docs/source/screenpro.rst deleted file mode 100644 index dffd7b7..0000000 --- a/docs/source/screenpro.rst +++ /dev/null @@ -1,13 +0,0 @@ -Utility functions -====================== - -.. automodule:: screenpro - :members: - :undoc-members: - :show-inheritance: - - -.. automodule:: screenpro.utils - :members: - :undoc-members: - :show-inheritance: From 5fc80a28df479bd9ad2b56315b2846a0bfbb3fe6 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Sat, 11 May 2024 19:39:00 -0700 Subject: [PATCH 3/5] improve docs --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 99c5dc8..15b78b3 100644 --- a/README.md +++ b/README.md @@ -212,12 +212,16 @@ Currently, ScreenPro2 has easy-to-use workflows for the following CRISPR screen ### dCas9 CRISPRa/i single-sgRNA screens [Horlbeck et al., _eLife_ (2016)](http://dx.doi.org/10.7554/eLife.19760) -Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon. +Horlbeck et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses a single sgRNA within a single plasmid and then there are up to 10 sgRNAs per gene. The multiple sgRNAs per gene can be used to perfrom statistical comparisons in guide-level or gene-level between screen arms. [ScreenProcessing](https://github.com/mhorlbeck/ScreenProcessing) has been developed to process data from this type of screen. We reimplemented the same workflow in ScreenPro2 and it has all the necessary tools to process data from this type of screen. + + ### dCas9 CRISPRa/i dual-sgRNA screens [Replogle et al., _eLife_ (2022)](https://elifesciences.org/articles/81856) -Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen. An automated workflow / pipeline will be available soon. +Replogle et al. developed a CRISPR interference (CRISPRi) and CRISPR activation (CRISPRa) screening platform that uses two sgRNAs per gene within a single plasmid, and it has been used to perform genome-scale CRISPRi screens. ScreenPro2 has all the necessary tools to process data from this type of screen. + + From 24142c395bb81a226d891f94088f2796a50ef113 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Sat, 11 May 2024 20:00:42 -0700 Subject: [PATCH 4/5] update pic --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 15b78b3..0c26756 100644 --- a/README.md +++ b/README.md @@ -156,7 +156,8 @@ adata = ad.AnnData( screen = PooledScreens(adata) ``` -image + +image #### Perform Screen Processing Analysis Once the screen object is created, you can use several available workflows to calculate the phenotype scores and statisitics by comparing each entry in reference sgRNA library between screen arms. Then, these scores and statistics are used to nominate hits. From 313506194a434467e9af377b86e77de33feef2fa Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Sat, 11 May 2024 20:03:40 -0700 Subject: [PATCH 5/5] update version --- screenpro/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 59bc90b..07536f1 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -6,6 +6,6 @@ from .ngs import Counter from .assays import PooledScreens, GImaps -__version__ = "0.3.0" +__version__ = "0.3.1" __author__ = "Abe Arab" __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com" \ No newline at end of file