-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmothur_tutorial_complete.txt
73 lines (56 loc) · 3.18 KB
/
mothur_tutorial_complete.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#1 Request an interactive session in abel
qlogin --account=ln0004k --ntasks=1 --mem-per-cpu=4G --time=05:00:00 --reservation=BIO9905
#2 make a folder in your home directory for the mothur analyses and move into it
mkdir mothur_analysis
cd mothur_analysis
#3 load modules for qiime and set paths
module purge
module load mothur
#4 Fetch the data and database we're going to work with.
#a. This is a subset of the merged, BayesHammer corrected data we've already worked with
#it also includes a mothur group file
wget http://folk.uio.no/marieda/mothur.data.tar.gz
#b. write your own command to fetch the database from this location: http://folk.uio.no/marieda/mothur.silva.tar.gz
#5 expand the files and remove the original files
tar -xzvf mothur.data.tar.gz
tar -xzvf mothur.silva.tar.gz
rm mothur.data.tar.gz
rm mothur.silva.tar.gz
#6 open an interactive session with mothur
mothur
#7 use fastq.info to parse the fastq file into a fasta file and a qual file
#a. write your own command specifying the fastq file (if you're having trouble, see the bottom of the file)
#8 view the summary statistics for your sequences
#you'll notice there are some sequences with n's and long homopolymers
summary.seqs(fasta=merged.seqs.fasta)
#9 filter the sequences using screen.seqs
#use the following options:
#fasta=merged.seqs.fasta
#maxambig=0
#maxhomop=8
#group=mothur.group.txt
#10 view the summary statistics for your filtered sequences
summary.seqs(fasta=merged.seqs.good.fasta)
#11 speed things up by only working with unique sequences
unique.seqs(fasta=merged.seqs.good.fasta)
#12 align the sequences using align.seqs
align.seqs(fasta=merged.seqs.good.unique.fasta,reference=SILVA_128.align.fasta,processors=4)
#aligning takes a long time! kill that process
#instead, fetch the results from http://folk.uio.no/marieda/aligned.tar.gz
#13 remove gap-only positions from our alignment
filter.seqs(fasta=merge.seqs.good.unique.good.align, vertical=T, trump=.) \
#14 do some additional error correction by merging sequences that have only a single bp difference from one another
pre.cluster(fasta=merge.seqs.good.unique.good.filter.unique.fasta, count=merge.seqs.good.unique.good.filter.count_table, diffs=1)
#15 check for chimeras using mothur's version of vsearch
chimera.vsearch(fasta=merged.seqs.good.unique.filter.unique.precluster.fasta,count=merged.seqs.good.unique.good.filter.count_table,template=self,mindiv=1,dereplicate=t)
#13 remove chimeric sequences using chimera.vsearch
remove.seqs(fasta=merged.seqs.good.unique.good.filter.unique.precluster.fasta,accnos=merged.seqs.good.unique.denovo.vsearch.accnos)
#16 view the summary statistics for your chimera free data
#here we are using mothur's shorthand 'current' for referring to the most recently generated file of that type
summary.seqs(fasta=current,count=current)
#17 create a distance matrix for clustering
dist.seqs(fasta=current,cutoff=0.1)
#18 use the distance matrix as the input for the single linkage clustering command 'cluster'
#19 compare the sequences to the silva database using the classify.otu command
fastq.info(fastq=merged.seqs.fastq)
screen.seqs(fasta=merged.seqs.fasta,maxambig=0,maxhomop=8,group=mothur.group.txt)