Skip to content

Commit

Permalink
Refactoring old stuff, general cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
groverj3 committed Aug 1, 2019
1 parent 8a7b482 commit 19e2b55
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 138 deletions.
2 changes: 1 addition & 1 deletion gff_to_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ def main(args):


if __name__ == '__main__':
gff_to_bed(get_args())
main(get_args())
37 changes: 37 additions & 0 deletions gtf_append_chr_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python3
# Author: Jeffrey Grover
# Purpose: Append a string to all chromosome IDs
# Created: 2019-03-14

from argparse import ArgumentParser


def append_chromosome(input_file, chrom_string):
with open(input_file, 'r') as input_handle:
for line in input_handle:
entry = line.strip().split('\t')
entry[0] = entry[0] + chrom_string
print('\t'.join(entry))


# Parse command line options

def get_args():
parser = ArgumentParser(
description='Appends a string to all chromosomes in a GTF file')
parser.add_argument('gtf', help='File to process', metavar='FILE.gtf')
parser.add_argument('--string',
'-s',
help='String to add to each chromosome ID')
return parser.parse_args()


# Process file


def main(args):
append_chromosome(args.gtf, args.string)


if __name__ == '__main__':
main(get_args())
44 changes: 0 additions & 44 deletions gtf_append_ids.py

This file was deleted.

55 changes: 31 additions & 24 deletions gtf_clean_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,45 @@
from argparse import ArgumentParser


def clean_gtf(input_file, cleaner_sep):
def clean_gtf(input_file, cleaner_sep, chromosome, feature):
with open(input_file, 'r') as input_handle:
for line in input_handle:
entry = line.split('\t')
chrom = entry[0].split(cleaner_sep)[0]
source = entry[1]
feature = entry[2]
start = entry[3]
stop = entry[4]
score = entry[5]
strand = entry[6]
frame = entry[7]
entry = line.strip().split('\t')
if chromosome:
entry[0] = entry[0].split(cleaner_sep)[0]
group = entry[8].split('; ')
transcript_id = group[0].split(cleaner_sep)[0]
gene_id = group[1].split(cleaner_sep)[0]
gene_name = group[2].split(cleaner_sep)[0]
print(chrom, source, feature, start, stop, score, strand, frame,
sep='\t', end='\t')
print(transcript_id, gene_id, gene_name, sep='"; ', end='"\n')
if feature:
transcript_id = group[0].split(cleaner_sep)[0]
gene_id = group[1].split(cleaner_sep)[0]
gene_name = group[2].split(cleaner_sep)[0]
group = [transcript_id, gene_id, gene_name]
print('\t'.join(entry[0:8]), end='\t')
print('"; '.join(group), end='"\n')


# Parse command line options

parser = ArgumentParser(
description='Removes garbage from chromosome and gene IDs in a '
'gtf file. Works on at least the one file I needed it to')
parser.add_argument('input_path', help='File to process', metavar='File')
parser.add_argument('--sep', '-s', help='Separator to remove text after')

input_path = parser.parse_args().input_path
cleaner_sep = parser.parse_args().sep
def get_args():
parser = ArgumentParser(
description='Removes garbage from chromosome and gene IDs in a gtf.')
parser.add_argument('gtf', help='File to process', metavar='FILE.gtf')
parser.add_argument('--sep', '-s', help='Separator to remove text after')
parser.add_argument('--chromosome', '-c',
help='Clean chromosome IDs',
action='store_true')
parser.add_argument('--feature', '-f',
help='Clean feature IDs',
action='store_true')
return parser.parse_args()


# Process file

clean_gtf(input_path, cleaner_sep)

def main(args):
clean_gtf(args.gtf, args.sep, args.chromosome, args.feature)


if __name__ == '__main__':
main(get_args())
54 changes: 33 additions & 21 deletions methyldackel_conversion_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# Purpose: Determine bisulfite conversion rate from MethylDackel bedGraph files
# Created: 2/2019

import csv
from argparse import ArgumentParser

# Function block
Expand All @@ -14,20 +13,20 @@ def parse_bedgraph(input_context_bedgraph):
met_count = 0
unmet_count = 0
with open(input_context_bedgraph, 'r') as input_handle:
bedgraph_reader = csv.reader(input_handle, delimiter='\t')
next(bedgraph_reader) # Skip header
for line in bedgraph_reader:
met_count += int(line[4])
unmet_count += int(line[5])
next(input_handle) # Skip header
for line in input_handle:
entry = line.strip().split()
met_count += int(entry[4])
unmet_count += int(entry[5])
return [met_count, unmet_count]


def conversion_calc(cg_counts, chg_counts, chh_counts):
total_cg = cg_counts[0] + cg_counts[1]
total_chg = chg_counts[0] + chg_counts[1]
total_chh = chh_counts[0] + chh_counts[1]
conversion_rate = ( sum((cg_counts[1], chg_counts[1], chh_counts[1])) /
sum((total_cg, total_chg, total_chh)) ) * 100
conversion_rate = (sum((cg_counts[1], chg_counts[1], chh_counts[1])) / sum(
(total_cg, total_chg, total_chh))) * 100
print('CG Methylated/Total:\t', cg_counts[0], '/', total_cg)
print('CHG Methylated/Total:\t', chg_counts[0], '/', total_chg)
print('CHH Methylated/Total:\t', chh_counts[0], '/', total_chh)
Expand All @@ -36,21 +35,34 @@ def conversion_calc(cg_counts, chg_counts, chh_counts):

# Command line parser

parser = ArgumentParser(
description='Load CG, CHG, and CHH context bedGraph files from MethylDackel'
'and calculate bisulfite conversion rate.')
parser.add_argument('--CG', help='CG Context bedGraph file.', metavar='File')
parser.add_argument('--CHG', help='CHG context bedGraph file.', metavar='File')
parser.add_argument('--CHH', help='CHH context bedGraph file.', metavar='File')

cg_bedgraph = parser.parse_args().CG
chg_bedgraph = parser.parse_args().CHG
chh_bedgraph = parser.parse_args().CHH
def get_args():
parser = ArgumentParser(
description=
'Load CG, CHG, and CHH context bedGraph files from MethylDackel'
'and calculate bisulfite conversion rate.')
parser.add_argument('--CG',
help='CG Context bedGraph file.',
metavar='FILE.bedGraph')
parser.add_argument('--CHG',
help='CHG context bedGraph file.',
metavar='FILE.bedGraph')
parser.add_argument('--CHH',
help='CHH context bedGraph file.',
metavar='FILE.bedGraph')
return parser.parse_args()


# Process the files

cg_counts = parse_bedgraph(cg_bedgraph)
chg_counts = parse_bedgraph(chg_bedgraph)
chh_counts = parse_bedgraph(chh_bedgraph)

conversion_calc(cg_counts, chg_counts, chh_counts)
def main(args):
cg_counts = parse_bedgraph(args.CG)
chg_counts = parse_bedgraph(args.CHG)
chh_counts = parse_bedgraph(args.CHH)

conversion_calc(cg_counts, chg_counts, chh_counts)


if __name__ == '__main__':
main(get_args())
74 changes: 38 additions & 36 deletions methyldackel_percent_methylation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,56 @@
# Created: 2019-04-29

from argparse import ArgumentParser

# Function block
from sys import exit


def methyl_calc(input_bedgraph):
met_c = 0
unmet_c = 0
with open(input_bedgraph, 'r') as input_handle:
for line in input_handle:
if not line.startswith('track'): # Skip header
met_c += int(line.split()[4])
unmet_c += int(line.split()[5])
next(input_handle) # Skip header
met_c += int(line.split()[4])
unmet_c += int(line.split()[5])
return (met_c / (met_c + unmet_c)) * 100


# Command line parser

parser = ArgumentParser(
description='Calculate percent methylation from a set of MethylDackel '
'bedGraphs files.')
parser.add_argument('--CG',
help='CG context bedGraph',
default=None,
metavar='File')
parser.add_argument('--CHG',
help='CHG context bedGraph',
default=None,
metavar='File')
parser.add_argument('--CHH',
help='CHH context bedGraph',
default=None,
metavar='File')

cg_bedgraph = parser.parse_args().CG
chg_bedgraph = parser.parse_args().CHG
chh_bedgraph = parser.parse_args().CHH
def get_args():
parser = ArgumentParser(
description='Calculate percent methylation from a set of MethylDackel '
'bedGraphs files.')
parser.add_argument('--CG',
help='CG context bedGraph',
default=None,
metavar='FILE.bedGraph')
parser.add_argument('--CHG',
help='CHG context bedGraph',
default=None,
metavar='FILE.bedGraph')
parser.add_argument('--CHH',
help='CHH context bedGraph',
default=None,
metavar='FILE.bedGraph')
return parser.parse_args()


# Process the files

if not cg_bedgraph and not chg_bedgraph and not chh_bedgraph:
print('Without data how do you expect to do anything!')
exit
if cg_bedgraph:
cg_methylation = methyl_calc(cg_bedgraph)
print('CG', cg_methylation, sep='\t')
if chg_bedgraph:
chg_methylation = methyl_calc(chg_bedgraph)
print('CHG', chg_methylation, sep='\t')
if chh_bedgraph:
chh_methylation = methyl_calc(chh_bedgraph)
print('CHH', chh_methylation, sep='\t')
def main(args):
if not args.CG and not args.CHG and not args.CHH:
exit('Without data how do you expect to do anything!')
if args.CG:
cg_methylation = methyl_calc(args.CG)
print('CG', cg_methylation, sep='\t')
if args.CHG:
chg_methylation = methyl_calc(args.CHG)
print('CHG', chg_methylation, sep='\t')
if args.CHH:
chh_methylation = methyl_calc(args.CHH)
print('CHH', chh_methylation, sep='\t')


if __name__ == '__main__':
main(get_args())
10 changes: 2 additions & 8 deletions methylkit_analyze.r
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,10 @@ get_args <- function () {

# Main definition

main <- function () {

# Get args

args <- get_args()
main <- function (args) {
control_files <- unlist(strsplit(args$control, ','))
experimental_files <- unlist(strsplit(args$experimental, ','))

# Create out_dir

output_dir <- 'methylkit_analyze'
dir.create(output_dir)

Expand All @@ -179,4 +173,4 @@ main <- function () {

# Call main

main()
main(get_args())
7 changes: 3 additions & 4 deletions mosdepth_to_x_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ def get_args():
parser.add_argument(
'-f', '--fasta',
help='.fasta file for genome',
metavar='File')
metavar='FILE.fasta')
parser.add_argument(
'-m', '--mosdepth',
help='.bed.gz mosdepth output',
metavar='File')
metavar='FILE.bed.gz')
return parser.parse_args()


Expand All @@ -65,5 +65,4 @@ def main(args):


if __name__ == '__main__':
args = get_args()
main(args)
main(get_args())

0 comments on commit 19e2b55

Please sign in to comment.