Skip to content

Commit

Permalink
Go back to old data format names for metadata
Browse files Browse the repository at this point in the history
Going back to old values for data format where applicable, otherwise
it could cause confusion when comparing old and new processed data.
  • Loading branch information
Madeleine Price Ball committed Aug 31, 2012
1 parent 1aee93a commit 5728317
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
4 changes: 2 additions & 2 deletions server/conversion/convert_to_gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ def convert(input_file, options=None):
input_type = detect_format.detect_format(input_file)
if input_type == 'GFF':
input_data = autozip.file_open(input_file)
elif input_type == 'CGIvar':
elif input_type == 'CGIVAR':
input_data = cgivar_to_gff.convert(input_file, options)
elif input_type == '23andme':
elif input_type == '23ANDME':
input_data = gff_from_23andme.convert(input_file)
elif input_type == 'VCF':
input_data = vcf_to_gff.convert(input_file, options)
Expand Down
19 changes: 10 additions & 9 deletions server/conversion/detect_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ def detect_format(file_input):
Takes a path to a file, or a string generator (e.g. a filehandle).
Tries to match one of the following:
23andme: 23andme (microarray genotyping)
CGIvar: Complete Genomics var file
23ANDME: 23andme (microarray genotyping)
CGIVAR: Complete Genomics var file
deCODEme: deCODEme (microarray genotyping)
GFF: General Feature Format
VCF: Variant Call Format (only tested for 23andme exome data)
"""
Expand All @@ -48,14 +49,14 @@ def detect_format(file_input):
# Check comment lines, if they exist, for information on file type.
if re.match('#', line):
if re.match(r'#TYPE.*VAR-ANNOTATION', line):
print "Complete Genomics var file format (CGIvar) detected"
looks_like['CGIvar'] = True
print "Complete Genomics var file format (CGIVAR) detected"
looks_like['CGIVAR'] = True
if re.match(r'##gff-version', line):
print "General Feature Format (GFF) detected"
looks_like['GFF'] = True
if re.match(r'# This data file generated by 23andMe', line):
print "23andme microarray genotyping data (23andme) detected"
looks_like['23andme'] = True
print "23andme microarray genotyping data (23ANDME) detected"
looks_like['23ANDME'] = True
if re.match(r'##fileformat=VCFv4', line):
print "Variant Call Format (VCF) detected"
looks_like['VCF'] = True
Expand All @@ -76,15 +77,15 @@ def detect_format(file_input):
re.match(r'rs', tsv_data[0]) and
re.match(r'[0-9]', tsv_data[2]) and
re.match(r'[ACGT][ACGT]', tsv_data[3]) ):
print "23andme microarray genotyping data (23andme) guessed"
looks_like['23andme'] = True
print "23andme microarray genotyping data (23ANDME) guessed"
looks_like['23ANDME'] = True
if ( len(tsv_data) > 6 and
re.match(r'chr', tsv_data[3]) and
re.match(r'[0-9]', tsv_data[4]) and
re.match(r'[0-9]', tsv_data[5]) and
(tsv_data[6] == "no-call" or tsv_data[6] == "ref") ):
print "Complete Genomics var file format (CGIvar) guessed"
looks_like['CGIvar'] = True
looks_like['CGIVAR'] = True
if ( len(tsv_data) > 6 and
re.match(r'[0-9]', tsv_data[3]) and
re.match(r'[0-9]', tsv_data[4]) and
Expand Down

0 comments on commit 5728317

Please sign in to comment.