forked from armartin/ancestry_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplot_painted_karyograms.sh
36 lines (30 loc) · 1.58 KB
/
plot_painted_karyograms.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
## !/bin/sh
### Elizabeth G. Atkinson
### 2/22/18
## post-processing RFmix files for cohort local ancestry inference to be plotted as painted karyograms. Input is the base name for RFmix msp.tsv files without that suffix and a text file with individual IDs on new lines. Each individual will be collapsed into bed files for each chromosome strand and then plotted.
#hard coded for EUR/AFR ancestries. Change this line for other pops, paying attention to order of ancestries in the RFmix file.
MSP=$1
INDIVS=$2
#make a new file with the location of SNPs in the correct format
awk '{ if ($1!~"#") print $0 }' $MSP.msp.tsv | awk '{ if ($1!~"#") print $0 }'| cut -f2,4 > $MSP.snp_loc
#format the RFmix v2 msp output file to be read in properly (assumed v1 format)
awk '{ if ($1!~"#") print $0 }' $MSP.msp.tsv | awk '{ if ($1!~"#") print $0 }' > $MSP.msp1.tsv
cut -f7- $MSP.msp1.tsv > $MSP.Viterbi.tsv
#cycle through all the individuals and make their collapsed bed files with a slightly modified script from Alicia’s ancestry pipeline:
cat $INDIVS | while read line; do \
python collapse_ancestry1.py \
--rfmix $MSP.Viterbi.tsv \
--snp_locations $MSP.snp_loc \
--ind $line \
--ind_info $INDIVS \
--pop_labels "AFR,EUR" \
--out $line ;done
#plot up painted karyograms for each indiv. Will output a png file for each individual. Can swap out the full individual list for just IDs of specific people if desired
cat $INDIVS | while read line; do \
python plot_karyogram1.py \
--bed_a $line.A.bed \
--bed_b $line.B.bed \
--ind $line \
--centromeres centromeres_hg19.bed \
--pop_order AFR,EUR \
--out $line.png ;done