-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpipeline_iCLIP.ini
248 lines (191 loc) · 6.21 KB
/
pipeline_iCLIP.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#################################################################
#
#
# Pipeline pipeline_iCLIP.py configuration file for sphinxreport
#
# This pipeline.ini file lists some configuration options that you might
# want a user to set automatically.
#
# Add pipeline specific options into separate sections
#
################################################################
## general options
[general]
# Project name
projectname=iCLIP_Pipeline
# Copyright statement
copyright=CGAT (2010-2014)
# The short X.Y version.
version=0.1
# The full version, including alpha/beta/rc tags.
release=0.1
database=csvdb
csv2db_options=--backend=sqlite --retry --map=gene_id:str --map=contig:str --map=transcript_id:str
input=../demux_fq
mappers=star
strip_sequence=0
########################################################
# the genome to use (UCSC convention)
genome=hg19
genome_dir=/ifs/mirror/genomes/index
[reads]
bc_pattern=NNNXXXXNN
5prime_adapt=AGATCGGAAGAGCGACGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
3prime_adapt=AGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG
paired=
length=100
[phix]
genome=phix
bowtie_exe=bowtie
bowtie_options=-v 2 --best --strata -a
bowtie_threads=12
bowtie_memory=1.9G
[experiment]
input=FlipIn
length_bins=45,85,120
[dedup]
options = --method=directional-adjacency
################################################################
#
# Location of annotation database
#
################################################################
[annotations]
database=/ifs/data/annotations/hg19_ensembl72/csvdb
db=/ifs/data/annotations/hg19_ensembl72/csvdb
# directory with annotation information
dir=/ifs/data/annotations/hg19_ensembl72
###############################################################
#
#Options for the mapping pipeline
#
###############################################################
[pipeline_mapping]
jobs=100
[fasta]
genome_dir=/ifs/mirror/genomes/bowtie
[geneset]
# set, if ribosomal and other repetetive rna genes should be removed
# (note: lincRNA are kept in)
remove_repetetive_rna=1
# pattern to match for contigs to remove
remove_contigs=chrM|chrMT|_random|chrUn|_hap|chrGL|chrHSCHR
# UCSC mappability track to use for estimating mappability
# of a transcript
mappability=/ifs/mirror/ucsc/hg19/encodeDCC/wgEncodeMapability/wgEncodeCrgMapabilityAlign36mer.bigWig
# minimum flanking error for isoform annotation
flank=5000
###############################################################
[star]
# star executable.
executable=STAR
# directory with star indices
index_dir=/ifs/mirror/genomes/star
# options to specify different --genomeDir if using sjdb
# see guidelines for generating genomes with a splice junction database
# (/ifs/mirror/genomes/star/CGAT-README)
# leave blank to use the genome as specified above without junctions.
genome=hg19_junc75_99
# options for star. Please see the star manual for a list
# of options. There are many.
options=--outFilterMultimapNmax 1 --outFilterType BySJout --outFilterMismatchNoverLmax 0.2 --outFilterScoreMin 0.8 --alignSJDBoverhangMin 1
# number of threads to use
threads=12
# memory required for STAR. The memory requirement depends on
# the size of the suffix array. Note that the total memory requirement
# is threads * memory. Our nodes/blades have ~23.5GB.
# After testing, 1.9 (*12) seems to be the magic number
memory=3G
###############################################################
[tophat2]
# tophat executable. An alternative is cgat-tophat, which attempts
# to paralleize the segment_junction step
executable=tophat2
# default values, but enabling extra search options. Added the
# directory containing the bowtie indexed reference transcriptome
options=
# threads to use
threads=4
# memory required for tophat jobs - note that this is multiplied by the
# number of threads. Thus, 4 threads * 2G = 8G
memory=2G
# --mate-inner-dist parameter for tophat, required for paired-ended runs.
# inner distance (in TopHat) = insert length - 2 * read length
# also: tophat2
mate_inner_dist=60
# map to reference transcriptome
# also: tophat2
include_reference_transcriptome=1
# library type (see tophat/cufflinks manual) - also used by cufflinks and cuffdiff
# also: tophat2
library_type=fr-unstranded
################################################################
[bowtie]
# bowtie executable
executable=bowtie
# directory with bowtie indices
index_dir=/ifs/mirror/genomes/bowtie
# options for mapping with bowtie
# note that bowtie two does not accept any of these defaults
# !! For paired end mapping note that the default max insert length is 250 !!
# !! This can be modified with -X !!
#options=-v 2 --best --strata -a
#for bowtie2:
options=--all
# options for mapping with bowtie against transcriptome
# (reporting options are set automatically)
transcriptome_options=-v 2
# threads to use
threads=12
# memory required for bowtie jobs - per thread
memory=1.9G
################################################################
[merge]
pattern_input=(.+)/(.+\-.+\-[^_]+)_(.+)
pattern_output=\1/merged_\2
[clusters]
fdr=0
window_size=15
min_reproducible=2
pthresh=0.1
#######################################################
#######################################################
#######################################################
## options related to motif search
#######################################################
[motifs]
# maximum number of characters for motif discovery
max_size=1000000
####
# masker to use for meme, valid options are 'dustmasker' and 'repeatmasker'
# (the latter requires an appropriately annotated genome.fasta file)
masker=dustmasker
# minimum number of sequences to use for motif search
min_sequences=100
[meme]
# meme model to use
model=zoops
# maximum number of characters for motif discovery
max_sequences=2500
# number of motifs to find with meme
nmotifs=5
# meme_options
options=-minw 4 -maxw 30
#order for background model
background_order=1
[dreme]
options=
################################################################
#
# sphinxreport build options
#
################################################################
[report]
# prefix to use for publishing the report from this pipeline
prefix=iCLIP_fullrun5/
engine=cgatreport
table_class=sortable
[iclip]
dir=.
database=./csvdb
exportdir=export