omic_config.yaml

##############################################################################
# Genome and project-specific files that will change from project-to-project
##############################################################################

# GTF file for the genome build you would like to use
gtf_file:
  /home/exacloud/lustre1/CEDAR/anurpa/genomes/gencode.v27.annotation.gtf
# Bed file for rseqc function read_distribution
bed_file:
  /home/exacloud/lustre1/CEDAR/roskamsh/projects/Omics-QC-pipeline/data/gencode_v27.bed
# Pre-built star indexes for hg38
star_index:
  /home/exacloud/lustre1/CEDAR/anurpa/genomes/
filter_anno:
  /home/groups/CEDAR/anno/biomaRt/hg38.Ens_90.biomaRt.geneAnno.Rdata

####################################################################
# Tool-specific files that will not change from project-to-project
####################################################################

# Fasta file including the illumina adapters to be trimmed out via Trimmomatic
adapter:
  /home/exacloud/lustre1/CEDAR/roskamsh/projects/Omics-QC-pipeline/data/TruSeq2-SE.fa
# Pre-built genome indexes for various organisms which we will screen against to look for contamination
conf:
  /home/groups/CEDAR/tools/FastQ_Screen_Genomes/fastq_screen.conf
# Paths to various tools which are used in this pipeline
star_tool:
  /home/exacloud/lustre1/CEDAR/tools/STAR/tags/2.5.3a/bin/Linux_x86_64/STAR

##########################################
# Important project-specific paths/files
##########################################

# Metadata needed for differential expression analysis
omic_meta_data:
  /path/to/metadata.txt

#############################################################################################
# Project-specific specifications you will need to make prior to submission of the pipeline
#############################################################################################

# Biotypes you would like to include in your gene counts table
# Example: protein_coding,lincRNA,sRNA,rRNA,snRNA,snoRNA
biotypes:
  protein_coding
# Unique project identifier which will be concatenated onto your counts table file name.
project_id:
  your_project
# Genome assembly used for GO analysis, format must be as below, with the genome assembly first, and the version of ensembl second, separated by a period.
assembly:
  hg38.90
# remove mito genes (1/0)
mito:
  1
# option to print GO term tree (0/1)
printTree:
  1
# fold change cutoff (not log2 transformed) for GO analysis and volcano pot
FC:
  2
# FDR adjusted p-value cutoff for GO analysis and volcano plot
adjp:
  0.01
# The column name of the characteristic you would like to do DE on. Example: Diagnosis, genotype, etc. This must be a column in your omic_meta_data.
linear_model:
  Condition
# The column name in which your sampleIDs are defined in omic_meta_data. These should match the sampleIDs in your fastq files.
sample_id:
  SampleID
# The column names in your omic_meta_data file which you would like to annotate your heatmaps by. This is used as a QC to look for batch effects. Enter columns which could potentially introduce a batch effect in your data.
meta_columns_to_plot:
  Condition: Condition
# Contrasts which you would like to run differential expression on.
# For example if you want to look at wild type (wt) vs. mutant (mut), you would specify mut-vs-wt
# It is important that when listing your types here, you write your baseline SECOND
diffexp:
  # contrasts for the deseq2 results method
  contrasts:
    Treatment1-vs-Control:
      - Treatment1
      - Control
    Treatment2-vs-Control:
      - Treatment2
      - Control
    Treatment3-vs-Control:
      - Treatment3
      - Control
# If oyu would like to run DE on a subsetted group of your conditions, list that group under the LRT option
# Otherwise, leave empty space after LRT
  LRT:
    - Treatment1
    - Treatment2
    - Treatment3

# The column name in your omic_meta_data file to colour your PCA plot by
pca:
  labels:
  # columns of sample sheet to use for PCA
    - Condition

# If you would like to colour your QC plots by an RColorBrewer palette, list that palette under the rcolorbrewer parameter, otherwise, write "NA"
# If you would like to colour your QC plots by specific colours, please list them under the discrete category, with one colour for each bullet point
## The discrete colours will be assigned to your Conditions (ie. linear_model) in the order that they show up in your metadata table, when it is ordered alphabetically by sample ID
## There must be the same number of colours listed as there are Conditions
# If no options are specified, the default ggplot2 colours are used
colors:
  rcolorbrewer:
    - NA
  discrete:
    - red
    - blue
    - green
    - yellow