Merge pull request #378 from BU-ISCIII/develop

Release to 1.4.0
BU-ISCIII · Jan 28, 2025 · 607e792 · 607e792
2 parents 2d1f7a0 + 367873e
commit 607e792
Show file tree

Hide file tree

Showing 13 changed files with 726 additions and 134 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,18 +4,31 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [1.X.0] - 202X-XX-XX : https://github.com/BU-ISCIII/relecov-tools/releases/tag/
+## [1.4.0] - 2025-01-27 : https://github.com/BU-ISCIII/relecov-tools/releases/tag/v1.4.0
 
 ### Credits
 
 Code contributions to the release:
 
+- [Sarai Varona](https://github.com/svarona)
+- [Alejandro Bernabeu](https://github.com/aberdur)
+- [Victor Lopez](https://github.com/victor5lm)
+
 ### Modules
 
 #### Added enhancements
 
+- Added a IonTorrent flow cell for validation [#363](https://github.com/BU-ISCIII/relecov-tools/pull/363)
+- Added solution to timeout in upload-to-ena module [#368](https://github.com/BU-ISCIII/relecov-tools/pull/368)
+- Added log functionality to build-schema module [#340](https://github.com/BU-ISCIII/relecov-tools/pull/340)
+- Updated the metadata_processing field in configuration.json and added the other_preparation_kit, quality_control_metrics and consensus_criteria fields in the json schema [#372](https://github.com/BU-ISCIII/relecov-tools/pull/372)
+- Added quality control functionality to read-bioinfo-metadata [#373](https://github.com/BU-ISCIII/relecov-tools/pull/373)
+- Added dropdown functionality to build-schema enums [#374](https://github.com/BU-ISCIII/relecov-tools/pull/374)
+
 #### Fixes
 
+- Fixed read-bioinfo-metadata module [#367](https://github.com/BU-ISCIII/relecov-tools/pull/367)
+
 #### Changed
 
 #### Removed

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,62 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "relecov-tools"
+version = "1.4.0"
+description = "Tools for managing and proccessing relecov network data."
+readme = "README.md"
+requires-python = ">=3.7"
+authors = [
+    {name = "Sara Monzon", email = "[email protected]"},
+    {name = "Luis Chapado", email = "[email protected]"},
+    {name = "Isabel Cuesta", email = "[email protected]"},
+    {name = "Sarai Varona", email = "[email protected]"},
+    {name = "Daniel Valle", email = "[email protected]"},
+    {name = "Pablo Mata", email = "[email protected]"},
+    {name = "Victor Lopez", email = "[email protected]"},
+    {name = "Emi Arjona", email = "[email protected]"},
+    {name = "Jaime Ozaez", email = "[email protected]"},
+    {name = "Juan Ledesma", email = "[email protected]"},
+    {name = "Sergio Olmos", email = "[email protected]"},
+    {name = "Alejandro Bernabeu", email = "[email protected]"},
+    {name = "Alba Talavera", email = "[email protected]"}
+]
+maintainers = [
+    {name = "Sara Monzon", email = "[email protected]"},
+    {name = "Luis Chapado", email = "[email protected]"},
+    {name = "Isabel Cuesta", email = "[email protected]"},
+    {name = "Sarai Varona", email = "[email protected]"},
+    {name = "Daniel Valle", email = "[email protected]"},
+    {name = "Pablo Mata", email = "[email protected]"},
+    {name = "Victor Lopez", email = "[email protected]"},
+    {name = "Emi Arjona", email = "[email protected]"},
+    {name = "Jaime Ozaez", email = "[email protected]"},
+    {name = "Juan Ledesma", email = "[email protected]"},
+    {name = "Sergio Olmos", email = "[email protected]"},
+    {name = "Alejandro Bernabeu", email = "[email protected]"},
+    {name = "Alba Talavera", email = "[email protected]"}
+]
+keywords = [
+    "relecov",
+    "bioinformatics",
+    "pipeline",
+    "sequencing",
+    "NGS",
+    "next generation sequencing"
+]
+license = {text = "GNU GENERAL PUBLIC LICENSE v.3"}
+dynamic = ["dependencies"]
+
+[project.urls]
+Homepage = "https://github.com/BU-ISCIII/relecov-tools"
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+
+[tool.setuptools.packages.find]
+exclude = ["docs"]
+
+[project.scripts]
+relecov-tools = "relecov_tools.__main__:run_relecov_tools"
diff --git a/relecov_tools/__main__.py b/relecov_tools/__main__.py
@@ -35,7 +35,7 @@
     stderr=True, force_terminal=relecov_tools.utils.rich_force_colors()
 )
 
-__version__ = "1.3.0"
+__version__ = "1.4.0"
 
 
 def run_relecov_tools():

diff --git a/relecov_tools/assets/pipeline_utils/viralrecon.py b/relecov_tools/assets/pipeline_utils/viralrecon.py
@@ -8,7 +8,6 @@
 import os.path
 
 from pathlib import Path
-from datetime import datetime
 
 import relecov_tools.utils
 from relecov_tools.config_json import ConfigJson
@@ -135,7 +134,7 @@ def convert_to_json(self, samp_dict):
         j_list = []
         # Grab date from filename
         result_regex = re.search(
-            "variants_long_table(?:_\d{8})?\.csv", os.path.basename(self.file_path)
+            "variants_long_table(?:_\d{14})?\.csv", os.path.basename(self.file_path)
         )
         if result_regex is None:
             stderr.print(
@@ -153,18 +152,53 @@ def convert_to_json(self, samp_dict):
             j_list.append(j_dict)
         return j_list
 
-    def save_to_file(self, j_list):
+    def save_to_file(self, j_list, batch_date):
         """Transform the parsed data into a json file"""
-        date_now = datetime.now().strftime("%Y%m%d%H%M%S")
-        file_name = "long_table_" + date_now + ".json"
+        file_name = "long_table_" + batch_date + ".json"
         file_path = os.path.join(self.output_directory, file_name)
-
-        try:
-            with open(file_path, "w") as fh:
-                fh.write(json.dumps(j_list, indent=4))
-            stderr.print("[green]\tParsed data successfully saved to file:", file_path)
-        except Exception as e:
-            stderr.print("[red]\tError saving parsed data to file:", str(e))
+        if os.path.exists(file_path):
+            stderr.print(
+                f"[blue]Long table {file_path} file already exists. Merging new data if possible."
+            )
+            log.info(
+                "Long table %s file already exists. Merging new data if possible."
+                % file_path
+            )
+            original_table = relecov_tools.utils.read_json_file(file_path)
+            samples_indict = {item["sample_name"]: item for item in original_table}
+            for item in j_list:
+                sample_name = item["sample_name"]
+                if sample_name in samples_indict:
+                    if samples_indict[sample_name] != item:
+                        stderr.print(
+                            f"[red]Same sample {sample_name} has different data in both long tables."
+                        )
+                        log.error(
+                            "Sample %s has different data in %s and new long table. Can't merge."
+                            % (sample_name, file_path)
+                        )
+                        return None
+                else:
+                    original_table.append(item)
+            try:
+                with open(file_path, "w") as fh:
+                    fh.write(json.dumps(original_table, indent=4))
+                stderr.print(
+                    "[green]\tParsed data successfully saved to file:", file_path
+                )
+            except Exception as e:
+                stderr.print("[red]\tError saving parsed data to file:", str(e))
+                log.error("Error saving parsed data to file: %s", e)
+        else:
+            try:
+                with open(file_path, "w") as fh:
+                    fh.write(json.dumps(j_list, indent=4))
+                stderr.print(
+                    "[green]\tParsed data successfully saved to file:", file_path
+                )
+            except Exception as e:
+                stderr.print("[red]\tError saving parsed data to file:", str(e))
+                log.error("Error saving parsed data to file: %s", e)
 
     def parsing_csv(self):
         """
@@ -180,7 +214,7 @@ def parsing_csv(self):
 
 
 # START util functions
-def handle_pangolin_data(files_list, output_folder=None):
+def handle_pangolin_data(files_list, batch_date, output_folder=None):
     """File handler to parse pangolin data (csv) into JSON structured format.
 
     Args:
@@ -320,7 +354,7 @@ def get_pango_data_version(files_list):
     return pango_data_processed
 
 
-def parse_long_table(files_list, output_folder=None):
+def parse_long_table(files_list, batch_date, output_folder=None):
     """File handler to retrieve data from long table files and convert it into a JSON structured format.
     This function utilizes the LongTableParse class to parse the long table data.
     Since this utility handles and maps data using a custom way, it returns None to be avoid being  transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table().
@@ -349,7 +383,7 @@ def parse_long_table(files_list, output_folder=None):
         # Parsing long table data and saving it
         long_table_data = long_table.parsing_csv()
         # Saving long table data into a file
-        long_table.save_to_file(long_table_data)
+        long_table.save_to_file(long_table_data, batch_date)
         stderr.print("[green]\tProcess completed")
     elif len(files_list) > 1:
         method_log_report.update_log_report(
@@ -361,7 +395,7 @@ def parse_long_table(files_list, output_folder=None):
     return None
 
 
-def handle_consensus_fasta(files_list, output_folder=None):
+def handle_consensus_fasta(files_list, batch_date, output_folder=None):
     """File handler to parse consensus data (fasta) into JSON structured format.
 
     Args:
@@ -406,3 +440,32 @@ def handle_consensus_fasta(files_list, output_folder=None):
         )
         method_log_report.print_log_report(method_name, ["valid", "warning"])
     return consensus_data_processed
+
+
+def quality_control_evaluation(data):
+    """Evaluate the quality of the samples and add the field 'qc_test' to each 'data' entry."""
+    conditions = {
+        "per_sgene_ambiguous": lambda x: float(x) < 10,
+        "per_sgene_coverage": lambda x: float(x) > 98,
+        "per_ldmutations": lambda x: float(x) > 60,
+        "number_of_sgene_frameshifts": lambda x: int(x) == 0,
+        "number_of_unambiguous_bases": lambda x: int(x) > 24000,
+        "number_of_Ns": lambda x: int(x) < 5000,
+        "qc_filtered": lambda x: int(x) > 50000,
+        "per_reads_host": lambda x: float(x) < 20,
+    }
+    for sample in data:
+        try:
+            qc_status = "pass"
+            for param, condition in conditions.items():
+                value = sample.get(param)
+                if value is None or not condition(value):
+                    qc_status = "fail"
+                    break
+            sample["qc_test"] = qc_status
+        except ValueError as e:
+            sample["qc_test"] = "fail"
+            print(
+                f"Error processing sample {sample.get('sequencing_sample_id', 'unknown')}: {e}"
+            )
+    return data