forked from ohsu-cedar-comp-hub/Bulk-RNA-seq-pipeline-SE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathomic_config.yaml
115 lines (105 loc) · 4.64 KB
/
omic_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
##############################################################################
# Genome and project-specific files that will change from project-to-project
##############################################################################
# GTF file for the genome build you would like to use
gtf_file:
/home/exacloud/lustre1/CEDAR/anurpa/genomes/gencode.v27.annotation.gtf
# Bed file for rseqc function read_distribution
bed_file:
/home/exacloud/lustre1/CEDAR/roskamsh/projects/Omics-QC-pipeline/data/gencode_v27.bed
# Pre-built star indexes for hg38
star_index:
/home/exacloud/lustre1/CEDAR/anurpa/genomes/
filter_anno:
/home/groups/CEDAR/anno/biomaRt/hg38.Ens_90.biomaRt.geneAnno.Rdata
####################################################################
# Tool-specific files that will not change from project-to-project
####################################################################
# Fasta file including the illumina adapters to be trimmed out via Trimmomatic
adapter:
/home/exacloud/lustre1/CEDAR/roskamsh/projects/Omics-QC-pipeline/data/TruSeq2-SE.fa
# Pre-built genome indexes for various organisms which we will screen against to look for contamination
conf:
/home/groups/CEDAR/tools/FastQ_Screen_Genomes/fastq_screen.conf
# Paths to various tools which are used in this pipeline
star_tool:
/home/exacloud/lustre1/CEDAR/tools/STAR/tags/2.5.3a/bin/Linux_x86_64/STAR
##########################################
# Important project-specific paths/files
##########################################
# Metadata needed for differential expression analysis
omic_meta_data:
/path/to/metadata.txt
#############################################################################################
# Project-specific specifications you will need to make prior to submission of the pipeline
#############################################################################################
# Biotypes you would like to include in your gene counts table
# Example: protein_coding,lincRNA,sRNA,rRNA,snRNA,snoRNA
biotypes:
protein_coding
# Unique project identifier which will be concatenated onto your counts table file name.
project_id:
your_project
# Genome assembly used for GO analysis, format must be as below, with the genome assembly first, and the version of ensembl second, separated by a period.
assembly:
hg38.90
# remove mito genes (1/0)
mito:
1
# option to print GO term tree (0/1)
printTree:
1
# fold change cutoff (not log2 transformed) for GO analysis and volcano pot
FC:
2
# FDR adjusted p-value cutoff for GO analysis and volcano plot
adjp:
0.01
# The column name of the characteristic you would like to do DE on. Example: Diagnosis, genotype, etc. This must be a column in your omic_meta_data.
linear_model:
Condition
# The column name in which your sampleIDs are defined in omic_meta_data. These should match the sampleIDs in your fastq files.
sample_id:
SampleID
# The column names in your omic_meta_data file which you would like to annotate your heatmaps by. This is used as a QC to look for batch effects. Enter columns which could potentially introduce a batch effect in your data.
meta_columns_to_plot:
Condition: Condition
# Contrasts which you would like to run differential expression on.
# For example if you want to look at wild type (wt) vs. mutant (mut), you would specify mut-vs-wt
# It is important that when listing your types here, you write your baseline SECOND
diffexp:
# contrasts for the deseq2 results method
contrasts:
Treatment1-vs-Control:
- Treatment1
- Control
Treatment2-vs-Control:
- Treatment2
- Control
Treatment3-vs-Control:
- Treatment3
- Control
# If oyu would like to run DE on a subsetted group of your conditions, list that group under the LRT option
# Otherwise, leave empty space after LRT
LRT:
- Treatment1
- Treatment2
- Treatment3
# The column name in your omic_meta_data file to colour your PCA plot by
pca:
labels:
# columns of sample sheet to use for PCA
- Condition
# If you would like to colour your QC plots by an RColorBrewer palette, list that palette under the rcolorbrewer parameter, otherwise, write "NA"
# If you would like to colour your QC plots by specific colours, please list them under the discrete category, with one colour for each bullet point
## The discrete colours will be assigned to your Conditions (ie. linear_model) in the order that they show up in your metadata table, when it is ordered alphabetically by sample ID
## There must be the same number of colours listed as there are Conditions
# If no options are specified, the default ggplot2 colours are used
colors:
rcolorbrewer:
- NA
discrete:
- red
- blue
- green
- yellow