From 5c56cd4fb089331109f43b049c9550f0fac87bd6 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Thu, 16 Sep 2021 01:37:24 +0000 Subject: [PATCH 01/14] adding duplicate and contig checking --- lib/GenomeFileUtil/core/GenomeInterface.py | 61 +++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index def9a499..93e894a4 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -228,7 +228,66 @@ def _update_genome(self, genome): else: GenomeUtils.set_default_taxon_data(genome) - if any([x not in genome for x in ('dna_size', 'md5', 'gc_content', 'num_contigs')]): + # Double check for duplicate feature ids across all 4 feature lists + # Note this is more than anything a check to make sure the coder does not introduce + # code that causes the code to handle duplicates to not work properly + # The following few lines need to be uncommented to test if the check is working properly + # only way to really test this checker + + temp_duplicate_cds = genome["cdss"][0] + genome["cdss"].append(temp_duplicate_cds) + + + + ids_present = set() + duplicates_ids_found = set() + if "cdss" in genome: + for cds in genome["cdss"]: + if cds["id"] in ids_present: + duplicate_ids_found.add(cds["id"]) + else: + ids_present.add(cds["id"]) + if "features" in genome: + for feature in genome["features"]: + if feature["id"] in ids_present: + duplicate_ids_found.add(cds["id"]) + else: + ids_present.add(feature["id"]) + if "mrnas" in genome: + for mrna in genome["mrnas"]: + if mrna["id"] in ids_present: + duplicate_ids_found.add(cds["id"]) + else: + ids_present.add(mrna["id"]) + if "non_coding_featues" in genome: + for non_coding_feature in genome["non_coding_features"]: + if non_coding_feature["id"] in ids_present: + duplicate_ids_found.add(cds["id"]) + else: + ids_present.add(non_coding_feature["id"]) + if len(duplicates_ids_found) > 0: + duplicate_id_string = ', '.join(str(s) for s in duplicates_ids_found) + raised_error_message = ("Duplicate ids were found and not properly handled by the uploader. " + "Please enter a help desk ticket. Duplicate IDs: " + duplicate_id_string) + raise ValueError(raised_error_message) + + # fixes issue of user have contig_ids key but an empty list + if 'contig_ids' in genome and len(genome['contig_ids']) == 0 and 'assembly_ref' in genome: + assembly_data = self.dfu.get_objects( + {'object_refs': [genome['assembly_ref']], + 'ignore_errors': 0})['data'][0]['data'] + contig_ids = assembly_data["contigs"].keys() + genome["contig_ids"] = contig_ids + contig_lengths = list() + for contig_id in contig_ids: + if "length" in assembly_data["contigs"][contig_id]: + contig_lengths.append(assembly_data["contigs"][contig_id]["length"]) + else: + contig_lengths.append(0) + genome["contig_lengths"] = contig_lengths + + if any([x not in genome for x in ('dna_size', 'md5', 'gc_content', 'num_contigs', 'contig_ids', "contig_lengths")]): if 'assembly_ref' in genome: assembly_data = self.dfu.get_objects( {'object_refs': [genome['assembly_ref']], From a417ac639968f6e5e18c983f6952f9e1bd14279f Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Thu, 16 Sep 2021 02:08:40 +0000 Subject: [PATCH 02/14] adding duplicate and contig checking --- lib/GenomeFileUtil/core/GenomeInterface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 93e894a4..aacc01be 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -235,8 +235,9 @@ def _update_genome(self, genome): # only way to really test this checker temp_duplicate_cds = genome["cdss"][0] + print("first CDS : " + str(temp_duplicate_cds)) genome["cdss"].append(temp_duplicate_cds) - + print("Last CDS : " + str( genome["cdss"][-1])) ids_present = set() From e1a17d296d23d27c03e460794ea64858bce107f1 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Thu, 16 Sep 2021 02:33:52 +0000 Subject: [PATCH 03/14] adding duplicate and contig checking --- lib/GenomeFileUtil/core/GenomeInterface.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index aacc01be..62a1fec8 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -210,6 +210,9 @@ def _update_metagenome(self, genome): def _update_genome(self, genome): """Checks for missing required fields and fixes breaking changes""" + + print("IN UPDATE GENOME") + print(str(genome)) # do top level updates ontologies_present = defaultdict(dict) # type: dict ontologies_present.update(genome.get('ontologies_present', {})) @@ -233,6 +236,7 @@ def _update_genome(self, genome): # code that causes the code to handle duplicates to not work properly # The following few lines need to be uncommented to test if the check is working properly # only way to really test this checker + temp_duplicate_cds = genome["cdss"][0] print("first CDS : " + str(temp_duplicate_cds)) From 5da043e1bd0f89b00ee8d31743199aa36f96c45f Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Thu, 16 Sep 2021 02:49:54 +0000 Subject: [PATCH 04/14] adding duplicate and contig checking --- lib/GenomeFileUtil/core/GenomeInterface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 62a1fec8..2061d26c 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -272,8 +272,8 @@ def _update_genome(self, genome): ids_present.add(non_coding_feature["id"]) if len(duplicates_ids_found) > 0: duplicate_id_string = ', '.join(str(s) for s in duplicates_ids_found) - raised_error_message = ("Duplicate ids were found and not properly handled by the uploader. " - "Please enter a help desk ticket. Duplicate IDs: " + raised_error_message = ("Duplicate ids were found and not properly handled by the uploader. " + + "Please enter a help desk ticket. Duplicate IDs: " + duplicate_id_string) raise ValueError(raised_error_message) From 969daa951809dde0c79e9dc535631e3c23df1fba Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Thu, 16 Sep 2021 03:52:28 +0000 Subject: [PATCH 05/14] adding duplicate and contig checking --- lib/GenomeFileUtil/core/GenomeInterface.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 2061d26c..3af66d81 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -211,8 +211,8 @@ def _update_metagenome(self, genome): def _update_genome(self, genome): """Checks for missing required fields and fixes breaking changes""" - print("IN UPDATE GENOME") - print(str(genome)) + print(f"IN UPDATE GENOME {str(genome)}") + # do top level updates ontologies_present = defaultdict(dict) # type: dict ontologies_present.update(genome.get('ontologies_present', {})) @@ -270,12 +270,14 @@ def _update_genome(self, genome): duplicate_ids_found.add(cds["id"]) else: ids_present.add(non_coding_feature["id"]) + print(f"dup ids count {str(len(duplicate_ids_found))}") if len(duplicates_ids_found) > 0: duplicate_id_string = ', '.join(str(s) for s in duplicates_ids_found) - raised_error_message = ("Duplicate ids were found and not properly handled by the uploader. " + - "Please enter a help desk ticket. Duplicate IDs: " + - duplicate_id_string) - raise ValueError(raised_error_message) + raised_error_message = "Duplicate keys HERE" + #("Duplicate ids were found and not properly handled by the uploader. " + + # "Please enter a help desk ticket. Duplicate IDs: " + + # duplicate_id_string) + raise ValueError(f"DUPLICATE IDS ERROR") # fixes issue of user have contig_ids key but an empty list if 'contig_ids' in genome and len(genome['contig_ids']) == 0 and 'assembly_ref' in genome: From 4e357765290a7ab2479d6e66cece60a3a17e43b3 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Thu, 16 Sep 2021 23:41:52 +0000 Subject: [PATCH 06/14] Trying dup fix for all regular genomes --- lib/GenomeFileUtil/core/GenomeInterface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 3af66d81..27dd1b30 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -142,8 +142,8 @@ def save_one_genome(self, params): if params.get('upgrade') or 'feature_counts' not in data: data = self._update_metagenome(data) else: - if params.get('upgrade') or 'feature_counts' not in data: - data = self._update_genome(data) +# if params.get('upgrade') or 'feature_counts' not in data: + data = self._update_genome(data) # check all handles point to shock nodes owned by calling user self._own_handle(data, 'genbank_handle_ref') From 9b318bb6c4b68725b6be669f26b0024e5b601c23 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Fri, 17 Sep 2021 00:33:12 +0000 Subject: [PATCH 07/14] Trying dup fix for all regular genomes --- lib/GenomeFileUtil/core/GenomeInterface.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 27dd1b30..31536feb 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -245,7 +245,7 @@ def _update_genome(self, genome): ids_present = set() - duplicates_ids_found = set() + duplicate_ids_found = set() if "cdss" in genome: for cds in genome["cdss"]: if cds["id"] in ids_present: @@ -255,19 +255,19 @@ def _update_genome(self, genome): if "features" in genome: for feature in genome["features"]: if feature["id"] in ids_present: - duplicate_ids_found.add(cds["id"]) + duplicate_ids_found.add(feature["id"]) else: ids_present.add(feature["id"]) if "mrnas" in genome: for mrna in genome["mrnas"]: if mrna["id"] in ids_present: - duplicate_ids_found.add(cds["id"]) + duplicate_ids_found.add(mrna["id"]) else: ids_present.add(mrna["id"]) if "non_coding_featues" in genome: for non_coding_feature in genome["non_coding_features"]: if non_coding_feature["id"] in ids_present: - duplicate_ids_found.add(cds["id"]) + duplicate_ids_found.add(non_coding_feature["id"]) else: ids_present.add(non_coding_feature["id"]) print(f"dup ids count {str(len(duplicate_ids_found))}") From e7de01884983d37b40f8a823196574f5e38304b5 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Fri, 17 Sep 2021 00:46:25 +0000 Subject: [PATCH 08/14] Trying dup fix for all regular genomes --- lib/GenomeFileUtil/core/GenomeInterface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 31536feb..1f7defee 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -271,12 +271,12 @@ def _update_genome(self, genome): else: ids_present.add(non_coding_feature["id"]) print(f"dup ids count {str(len(duplicate_ids_found))}") - if len(duplicates_ids_found) > 0: - duplicate_id_string = ', '.join(str(s) for s in duplicates_ids_found) + if len(duplicate_ids_found) > 0: + duplicate_ids_string = ', '.join(str(s) for s in duplicate_ids_found) raised_error_message = "Duplicate keys HERE" #("Duplicate ids were found and not properly handled by the uploader. " + # "Please enter a help desk ticket. Duplicate IDs: " + - # duplicate_id_string) + # duplicate_ids_string) raise ValueError(f"DUPLICATE IDS ERROR") # fixes issue of user have contig_ids key but an empty list From 53085858a9f7df212a44f044e20ee120ee1031b3 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Fri, 17 Sep 2021 03:02:03 +0000 Subject: [PATCH 09/14] Trying dup fix for all regular genomes --- lib/GenomeFileUtil/core/GenomeInterface.py | 131 +++++++++++---------- 1 file changed, 67 insertions(+), 64 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 1f7defee..7a446fc6 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -145,6 +145,8 @@ def save_one_genome(self, params): # if params.get('upgrade') or 'feature_counts' not in data: data = self._update_genome(data) + self._check_for_duplicate_ids(data): + # check all handles point to shock nodes owned by calling user self._own_handle(data, 'genbank_handle_ref') self._own_handle(data, 'gff_handle_ref') @@ -211,8 +213,6 @@ def _update_metagenome(self, genome): def _update_genome(self, genome): """Checks for missing required fields and fixes breaking changes""" - print(f"IN UPDATE GENOME {str(genome)}") - # do top level updates ontologies_present = defaultdict(dict) # type: dict ontologies_present.update(genome.get('ontologies_present', {})) @@ -231,74 +231,28 @@ def _update_genome(self, genome): else: GenomeUtils.set_default_taxon_data(genome) - # Double check for duplicate feature ids across all 4 feature lists - # Note this is more than anything a check to make sure the coder does not introduce - # code that causes the code to handle duplicates to not work properly - # The following few lines need to be uncommented to test if the check is working properly - # only way to really test this checker - - - temp_duplicate_cds = genome["cdss"][0] - print("first CDS : " + str(temp_duplicate_cds)) - genome["cdss"].append(temp_duplicate_cds) - print("Last CDS : " + str( genome["cdss"][-1])) - - - ids_present = set() - duplicate_ids_found = set() - if "cdss" in genome: - for cds in genome["cdss"]: - if cds["id"] in ids_present: - duplicate_ids_found.add(cds["id"]) - else: - ids_present.add(cds["id"]) - if "features" in genome: - for feature in genome["features"]: - if feature["id"] in ids_present: - duplicate_ids_found.add(feature["id"]) - else: - ids_present.add(feature["id"]) - if "mrnas" in genome: - for mrna in genome["mrnas"]: - if mrna["id"] in ids_present: - duplicate_ids_found.add(mrna["id"]) - else: - ids_present.add(mrna["id"]) - if "non_coding_featues" in genome: - for non_coding_feature in genome["non_coding_features"]: - if non_coding_feature["id"] in ids_present: - duplicate_ids_found.add(non_coding_feature["id"]) - else: - ids_present.add(non_coding_feature["id"]) - print(f"dup ids count {str(len(duplicate_ids_found))}") - if len(duplicate_ids_found) > 0: - duplicate_ids_string = ', '.join(str(s) for s in duplicate_ids_found) - raised_error_message = "Duplicate keys HERE" - #("Duplicate ids were found and not properly handled by the uploader. " + - # "Please enter a help desk ticket. Duplicate IDs: " + - # duplicate_ids_string) - raise ValueError(f"DUPLICATE IDS ERROR") - + need_to_populate_assembly_related_metadata = False # fixes issue of user have contig_ids key but an empty list - if 'contig_ids' in genome and len(genome['contig_ids']) == 0 and 'assembly_ref' in genome: - assembly_data = self.dfu.get_objects( - {'object_refs': [genome['assembly_ref']], - 'ignore_errors': 0})['data'][0]['data'] - contig_ids = assembly_data["contigs"].keys() - genome["contig_ids"] = contig_ids - contig_lengths = list() - for contig_id in contig_ids: - if "length" in assembly_data["contigs"][contig_id]: - contig_lengths.append(assembly_data["contigs"][contig_id]["length"]) - else: - contig_lengths.append(0) - genome["contig_lengths"] = contig_lengths - + if 'contig_ids' in genome and len(genome['contig_ids']) == 0 : + need_to_populate_assembly_related_metadata = True + if any([x not in genome for x in ('dna_size', 'md5', 'gc_content', 'num_contigs', 'contig_ids', "contig_lengths")]): + need_to_populate_assembly_related_metadata = True + + if need_to_populate_assembly_related_metadata: if 'assembly_ref' in genome: assembly_data = self.dfu.get_objects( {'object_refs': [genome['assembly_ref']], 'ignore_errors': 0})['data'][0]['data'] + contig_ids = assembly_data["contigs"].keys() + genome["contig_ids"] = contig_ids + contig_lengths = list() + for contig_id in contig_ids: + if "length" in assembly_data["contigs"][contig_id]: + contig_lengths.append(assembly_data["contigs"][contig_id]["length"]) + else: + contig_lengths.append(0) + genome["contig_lengths"] = contig_lengths genome["gc_content"] = assembly_data['gc_content'] genome["dna_size"] = assembly_data['dna_size'] genome["md5"] = assembly_data['md5'] @@ -404,6 +358,55 @@ def _update_genome(self, genome): genome['feature_counts'] = type_counts return genome + def _check_for_duplicate_ids(self, genome): + """Check for dupicate ids. More of a sanity check as the code should not allow for this""" + + # Double check for duplicate feature ids across all 4 feature lists + # Note this is more than anything a check to make sure the coder does not introduce + # code that causes the code to handle duplicates to not work properly + # The following few lines need to be uncommented to test if the check is working properly + # only way to really test this checker + temp_duplicate_cds = genome["cdss"][0] + print("first CDS : " + str(temp_duplicate_cds)) + genome["cdss"].append(temp_duplicate_cds) + print("Last CDS : " + str( genome["cdss"][-1])) + + ids_present = set() + duplicate_ids_found = set() + if "cdss" in genome: + for cds in genome["cdss"]: + if cds["id"] in ids_present: + duplicate_ids_found.add(cds["id"]) + else: + ids_present.add(cds["id"]) + if "features" in genome: + for feature in genome["features"]: + if feature["id"] in ids_present: + duplicate_ids_found.add(feature["id"]) + else: + ids_present.add(feature["id"]) + if "mrnas" in genome: + for mrna in genome["mrnas"]: + if mrna["id"] in ids_present: + duplicate_ids_found.add(mrna["id"]) + else: + ids_present.add(mrna["id"]) + if "non_coding_featues" in genome: + for non_coding_feature in genome["non_coding_features"]: + if non_coding_feature["id"] in ids_present: + duplicate_ids_found.add(non_coding_feature["id"]) + else: + ids_present.add(non_coding_feature["id"]) + print(f"dup ids count {str(len(duplicate_ids_found))}") + if len(duplicate_ids_found) > 0: + duplicate_ids_string = ', '.join(str(s) for s in duplicate_ids_found) + #"Duplicate keys HERE" + raised_error_message = ("Duplicate ids were found and not properly handled by the uploader. " + + "Please enter a help desk ticket. Duplicate IDs: " + + duplicate_ids_string) + raise ValueError(raised_error_message) + return 1 + @staticmethod def validate_genome(g): """ From ad82508ff286049186a73161d1819ad70563a111 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Fri, 17 Sep 2021 03:27:19 +0000 Subject: [PATCH 10/14] Trying dup fix for all regular genomes --- lib/GenomeFileUtil/core/GenomeInterface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 7a446fc6..3d6769f6 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -404,7 +404,8 @@ def _check_for_duplicate_ids(self, genome): raised_error_message = ("Duplicate ids were found and not properly handled by the uploader. " + "Please enter a help desk ticket. Duplicate IDs: " + duplicate_ids_string) - raise ValueError(raised_error_message) + print("Duplicate IDs exist") + raise ValueError("DUPLICATES EXIST: " + raised_error_message) return 1 @staticmethod From 84b9a951380d86816fb819a3ca969389402e2294 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Fri, 17 Sep 2021 23:29:07 +0000 Subject: [PATCH 11/14] commenting out the dup check for debugging --- lib/GenomeFileUtil/core/GenomeInterface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 3d6769f6..389a0295 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -145,7 +145,7 @@ def save_one_genome(self, params): # if params.get('upgrade') or 'feature_counts' not in data: data = self._update_genome(data) - self._check_for_duplicate_ids(data): +# self._check_for_duplicate_ids(data): # check all handles point to shock nodes owned by calling user self._own_handle(data, 'genbank_handle_ref') @@ -231,8 +231,8 @@ def _update_genome(self, genome): else: GenomeUtils.set_default_taxon_data(genome) - need_to_populate_assembly_related_metadata = False # fixes issue of user have contig_ids key but an empty list + need_to_populate_assembly_related_metadata = False if 'contig_ids' in genome and len(genome['contig_ids']) == 0 : need_to_populate_assembly_related_metadata = True From dea25ec198842d689ec4476cb7d702b5982523db Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Fri, 17 Sep 2021 23:42:54 +0000 Subject: [PATCH 12/14] added back the dup check for debugging --- lib/GenomeFileUtil/core/GenomeInterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 389a0295..d81cfa2f 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -145,7 +145,7 @@ def save_one_genome(self, params): # if params.get('upgrade') or 'feature_counts' not in data: data = self._update_genome(data) -# self._check_for_duplicate_ids(data): + self._check_for_duplicate_ids(data): # check all handles point to shock nodes owned by calling user self._own_handle(data, 'genbank_handle_ref') From 9b07501487d36dbc534eee24f6fb739782088283 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Sat, 18 Sep 2021 00:07:38 +0000 Subject: [PATCH 13/14] added back the dup check for debugging --- lib/GenomeFileUtil/core/GenomeInterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index d81cfa2f..50bfd48b 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -145,7 +145,7 @@ def save_one_genome(self, params): # if params.get('upgrade') or 'feature_counts' not in data: data = self._update_genome(data) - self._check_for_duplicate_ids(data): + self._check_for_duplicate_ids(data) # check all handles point to shock nodes owned by calling user self._own_handle(data, 'genbank_handle_ref') From 64dd5034d0a58cad08e37e48fb8bb43371635cf6 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Sat, 18 Sep 2021 00:39:44 +0000 Subject: [PATCH 14/14] Final working version --- lib/GenomeFileUtil/core/GenomeInterface.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index 50bfd48b..e9a1c6cc 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -364,12 +364,11 @@ def _check_for_duplicate_ids(self, genome): # Double check for duplicate feature ids across all 4 feature lists # Note this is more than anything a check to make sure the coder does not introduce # code that causes the code to handle duplicates to not work properly - # The following few lines need to be uncommented to test if the check is working properly - # only way to really test this checker - temp_duplicate_cds = genome["cdss"][0] - print("first CDS : " + str(temp_duplicate_cds)) - genome["cdss"].append(temp_duplicate_cds) - print("Last CDS : " + str( genome["cdss"][-1])) + # The following two lines need to be uncommented to test if the check is working properly + # only way to really test this checker. + # Uncomment the next two lines to test if dup check is working. + #temp_duplicate_cds = genome["cdss"][0] + #genome["cdss"].append(temp_duplicate_cds) ids_present = set() duplicate_ids_found = set()