Skip to content

Commit

Permalink
Merge pull request #100 from sanjaynagi/local-snpeffdb-support-18-06-24
Browse files Browse the repository at this point in the history
add local db support for snpeff and update config names
  • Loading branch information
sanjaynagi authored Jun 27, 2024
2 parents 715ca49 + 89d648c commit 69fe338
Show file tree
Hide file tree
Showing 18 changed files with 387 additions and 378 deletions.
17 changes: 9 additions & 8 deletions .test/config/config_paired_end.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
metadata: config/samples.tsv
pipeline: cpu

# Dataset name
dataset: 'Test-GithubActionsCI'

jupyter-book:
results-jupyterbook:
activate: True

fastq:
Expand All @@ -18,8 +19,9 @@ reference:
"resources/reference/Anopheles-gambiae-PEST_TRANSCRIPTS_AgamP4.12-X.fa.gz"
gff:
"resources/reference/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.12-X.gff3"
snpeffdb:
"Anopheles_gambiae"
snpeff:
customdb: True
dbname: Anopheles_gambiae
genes2transcripts:
"resources/Gene2TranscriptMap.tsv"

Expand All @@ -35,7 +37,7 @@ contrasts:
QualityControl:
fastp-trim:
activate: True
mosdepth:
coverage:
activate: True
multiqc:
activate: True
Expand All @@ -45,7 +47,7 @@ DifferentialExpression: # Activate differential expression
gene-level:
activate: True
isoform-level:
activate: True
activate: False

progressiveGenes:
activate: True
Expand All @@ -60,7 +62,6 @@ DifferentialExpression: # Activate differential expression

VariantAnalysis:
activate: True
caller: freebayes #haplotypecaller # Options: haplotypecaller, freebayes
ploidy: 10
chunks: 9 # Number of chunks to split the genome into when parallelising freebayes
# Number of chunks to split the genome into when parallelising freebayes
Expand All @@ -69,15 +70,15 @@ VariantAnalysis:
activate: True
missingness: 0.4

summaryStatistics:
geneticDiversity:
activate: True
missingness: 0.4

selection:
activate: True
missingness: 0.5
# Do we want to run pbs (Needs three conditions, two more closely related and a slight outgroup)
pbs:
population-branch-statistic:
activate: False
contrasts: ['PiriTia_ContTia_Kisumu']

Expand Down
15 changes: 8 additions & 7 deletions .test/config/config_single_end.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
metadata: config/samples.tsv
pipeline: cpu

# Dataset name
dataset: 'Test-GithubActionsCI'

jupyter-book:
results-jupyterbook:
activate: True

fastq:
Expand All @@ -20,8 +21,9 @@ reference:
"resources/reference/Anopheles-gambiae-PEST_TRANSCRIPTS_AgamP4.12-X.fa.gz"
gff:
"resources/reference/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.12-X.gff3"
snpeffdb:
"Anopheles_gambiae"
snpeff:
customdb: False
dbname: Anopheles_gambiae
genes2transcripts:
"resources/Gene2TranscriptMap.tsv"

Expand All @@ -38,7 +40,7 @@ contrasts:
QualityControl:
fastp-trim:
activate: True
mosdepth:
coverage:
activate: True
multiqc:
activate: True
Expand All @@ -65,7 +67,6 @@ DifferentialExpression: # Activate differential expression

VariantAnalysis:
activate: True
caller: freebayes
ploidy: 10
chunks: 9 # Number of chunks to split the genome into when parallelising freebayes
# Number of chunks to split the genome into when parallelising freebayes
Expand All @@ -74,15 +75,15 @@ VariantAnalysis:
activate: True
missingness: 0.4

summaryStatistics:
geneticDiversity:
activate: True
missingness: 0.4

selection:
activate: True
missingness: 0.5
# Do we want to run pbs (Needs three conditions, two more closely related and a slight outgroup)
pbs:
population-branch-statistic:
activate: False
contrasts:
- 'PiriTia_ContTia_Kisumu'
Expand Down
17 changes: 8 additions & 9 deletions config/exampleconfig.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RNA-Seq-Pop

pipeline: parabricks #parabricks or cpu
metadata: config/samples.tsv # samplesheet metadata file

dataset: 'Ag_Bouake' # Dataset name: Can be anything, will be used to name some main output files
Expand Down Expand Up @@ -27,10 +27,11 @@ reference:
"resources/reference/Anopheles-gambiae-PEST_TRANSCRIPTS_AgamP4.12.fa" # Path to transcriptome reference FASTA file
gff:
"resources/reference/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.12.gff3" # Path to GFF annotation file
snpeffdb:
"Anopheles_gambiae" # SNPeff database name
snpeff:
customdb: False
dbname: Anopheles_gambiae # SNPeff database name
genes2transcripts:
"resources/exampleGene2TranscriptMap.tsv" # gene names file with gene and transcript names
resources/exampleGene2TranscriptMap.tsv # gene names file with gene and transcript names

# Chromosome names for the appropriate species.
# Please ensure that these correspond exactly to the reference fasta/gff files. Extra unwanted chromosomes (unplaced contigs) can be ignored.
Expand All @@ -48,7 +49,7 @@ contrasts:
QualityControl:
fastp-trim:
activate: False
mosdepth:
coverage:
activate: True
multiqc:
activate: True
Expand All @@ -75,23 +76,22 @@ DifferentialExpression:

VariantAnalysis:
activate: True
caller: freebayes
ploidy: 10 # Ploidy level for freebayes to call at (Generally we are using pooled samples).For diploid organisms, this should be 2 * number of individuals in each pool
chunks: 9 # Number of chunks to split each chromosome into when parallelising freebayes. 9 or less is recommended.

pca: # Run PCA on the genotype data
activate: True
missingness: 1

summaryStatistics: # Estimate Population Genetic Summary Statistics such as Dxy, Pi
geneticDiversity: # Estimate Population Genetic Summary Statistics such as Dxy, Pi
activate: True
missingness: 1

selection: # Calculate Fst and PBS per gene and in windows
activate: True
missingness: 1

pbs:
population-branch-statistic:
activate: True # Activate Population Branch Statistic analysis (Needs three conditions, two closely related and an outgroup) for resistance, do survivors_unexposed_susceptible
contrasts:
- 'gambiaePM_gambiaeCont_Kisumu'
Expand All @@ -114,7 +114,6 @@ VariantAnalysis:
# - "2Ru"



miscellaneous:
VariantsOfInterest:
activate: True
Expand Down
10 changes: 5 additions & 5 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ welcome(version="v2.0.2")

include: "rules/qc.smk"
include: "rules/diffexp.smk"
include: "rules/alignment.smk"
include: "rules/variantCalling.smk"
include: "rules/filterAnnotate.smk"
include: "rules/utilities.smk"
include: "rules/hisat2-freebayes.smk"
include: "rules/snpEff.smk"
include: "rules/variantAnalysis.smk"
include: "rules/jupyter-book.smk"

if config['VariantAnalysis']['caller'] == 'haplotypecaller':
include: "rules/parabricks-gpu.smk"
if config['pipeline'] == 'parabricks':
include: "rules/star-haplotypecaller.smk"

rule all:
input:
Expand Down
2 changes: 1 addition & 1 deletion workflow/notebooks/windowed-selection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
"comparisons = comparisons.contrast.str.split(\"_\", expand=True)\n",
"comparisons.columns = ['sus', 'res']\n",
"comparisons = [list(row) for i,row in comparisons.iterrows()]\n",
"pbscomps = config_params[\"VariantAnalysis\"]['selection']['pbs']['contrasts']\n",
"pbscomps = config_params[\"VariantAnalysis\"]['selection']['population-branch-statistic']['contrasts']\n",
"\n",
"for i, contig in enumerate(contigs):\n",
"\n",
Expand Down
127 changes: 0 additions & 127 deletions workflow/rules/alignment.smk

This file was deleted.

Loading

0 comments on commit 69fe338

Please sign in to comment.