Skip to content

Commit

Permalink
Merge pull request #378 from BU-ISCIII/develop
Browse files Browse the repository at this point in the history
Release to 1.4.0
  • Loading branch information
OPSergio authored Jan 28, 2025
2 parents 2d1f7a0 + 367873e commit 607e792
Show file tree
Hide file tree
Showing 13 changed files with 726 additions and 134 deletions.
15 changes: 14 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,31 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.X.0] - 202X-XX-XX : https://github.com/BU-ISCIII/relecov-tools/releases/tag/
## [1.4.0] - 2025-01-27 : https://github.com/BU-ISCIII/relecov-tools/releases/tag/v1.4.0

### Credits

Code contributions to the release:

- [Sarai Varona](https://github.com/svarona)
- [Alejandro Bernabeu](https://github.com/aberdur)
- [Victor Lopez](https://github.com/victor5lm)

### Modules

#### Added enhancements

- Added a IonTorrent flow cell for validation [#363](https://github.com/BU-ISCIII/relecov-tools/pull/363)
- Added solution to timeout in upload-to-ena module [#368](https://github.com/BU-ISCIII/relecov-tools/pull/368)
- Added log functionality to build-schema module [#340](https://github.com/BU-ISCIII/relecov-tools/pull/340)
- Updated the metadata_processing field in configuration.json and added the other_preparation_kit, quality_control_metrics and consensus_criteria fields in the json schema [#372](https://github.com/BU-ISCIII/relecov-tools/pull/372)
- Added quality control functionality to read-bioinfo-metadata [#373](https://github.com/BU-ISCIII/relecov-tools/pull/373)
- Added dropdown functionality to build-schema enums [#374](https://github.com/BU-ISCIII/relecov-tools/pull/374)

#### Fixes

- Fixed read-bioinfo-metadata module [#367](https://github.com/BU-ISCIII/relecov-tools/pull/367)

#### Changed

#### Removed
Expand Down
62 changes: 62 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "relecov-tools"
version = "1.4.0"
description = "Tools for managing and proccessing relecov network data."
readme = "README.md"
requires-python = ">=3.7"
authors = [
{name = "Sara Monzon", email = "[email protected]"},
{name = "Luis Chapado", email = "[email protected]"},
{name = "Isabel Cuesta", email = "[email protected]"},
{name = "Sarai Varona", email = "[email protected]"},
{name = "Daniel Valle", email = "[email protected]"},
{name = "Pablo Mata", email = "[email protected]"},
{name = "Victor Lopez", email = "[email protected]"},
{name = "Emi Arjona", email = "[email protected]"},
{name = "Jaime Ozaez", email = "[email protected]"},
{name = "Juan Ledesma", email = "[email protected]"},
{name = "Sergio Olmos", email = "[email protected]"},
{name = "Alejandro Bernabeu", email = "[email protected]"},
{name = "Alba Talavera", email = "[email protected]"}
]
maintainers = [
{name = "Sara Monzon", email = "[email protected]"},
{name = "Luis Chapado", email = "[email protected]"},
{name = "Isabel Cuesta", email = "[email protected]"},
{name = "Sarai Varona", email = "[email protected]"},
{name = "Daniel Valle", email = "[email protected]"},
{name = "Pablo Mata", email = "[email protected]"},
{name = "Victor Lopez", email = "[email protected]"},
{name = "Emi Arjona", email = "[email protected]"},
{name = "Jaime Ozaez", email = "[email protected]"},
{name = "Juan Ledesma", email = "[email protected]"},
{name = "Sergio Olmos", email = "[email protected]"},
{name = "Alejandro Bernabeu", email = "[email protected]"},
{name = "Alba Talavera", email = "[email protected]"}
]
keywords = [
"relecov",
"bioinformatics",
"pipeline",
"sequencing",
"NGS",
"next generation sequencing"
]
license = {text = "GNU GENERAL PUBLIC LICENSE v.3"}
dynamic = ["dependencies"]

[project.urls]
Homepage = "https://github.com/BU-ISCIII/relecov-tools"

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}

[tool.setuptools.packages.find]
exclude = ["docs"]

[project.scripts]
relecov-tools = "relecov_tools.__main__:run_relecov_tools"
2 changes: 1 addition & 1 deletion relecov_tools/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
stderr=True, force_terminal=relecov_tools.utils.rich_force_colors()
)

__version__ = "1.3.0"
__version__ = "1.4.0"


def run_relecov_tools():
Expand Down
95 changes: 79 additions & 16 deletions relecov_tools/assets/pipeline_utils/viralrecon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os.path

from pathlib import Path
from datetime import datetime

import relecov_tools.utils
from relecov_tools.config_json import ConfigJson
Expand Down Expand Up @@ -135,7 +134,7 @@ def convert_to_json(self, samp_dict):
j_list = []
# Grab date from filename
result_regex = re.search(
"variants_long_table(?:_\d{8})?\.csv", os.path.basename(self.file_path)
"variants_long_table(?:_\d{14})?\.csv", os.path.basename(self.file_path)
)
if result_regex is None:
stderr.print(
Expand All @@ -153,18 +152,53 @@ def convert_to_json(self, samp_dict):
j_list.append(j_dict)
return j_list

def save_to_file(self, j_list):
def save_to_file(self, j_list, batch_date):
"""Transform the parsed data into a json file"""
date_now = datetime.now().strftime("%Y%m%d%H%M%S")
file_name = "long_table_" + date_now + ".json"
file_name = "long_table_" + batch_date + ".json"
file_path = os.path.join(self.output_directory, file_name)

try:
with open(file_path, "w") as fh:
fh.write(json.dumps(j_list, indent=4))
stderr.print("[green]\tParsed data successfully saved to file:", file_path)
except Exception as e:
stderr.print("[red]\tError saving parsed data to file:", str(e))
if os.path.exists(file_path):
stderr.print(
f"[blue]Long table {file_path} file already exists. Merging new data if possible."
)
log.info(
"Long table %s file already exists. Merging new data if possible."
% file_path
)
original_table = relecov_tools.utils.read_json_file(file_path)
samples_indict = {item["sample_name"]: item for item in original_table}
for item in j_list:
sample_name = item["sample_name"]
if sample_name in samples_indict:
if samples_indict[sample_name] != item:
stderr.print(
f"[red]Same sample {sample_name} has different data in both long tables."
)
log.error(
"Sample %s has different data in %s and new long table. Can't merge."
% (sample_name, file_path)
)
return None
else:
original_table.append(item)
try:
with open(file_path, "w") as fh:
fh.write(json.dumps(original_table, indent=4))
stderr.print(
"[green]\tParsed data successfully saved to file:", file_path
)
except Exception as e:
stderr.print("[red]\tError saving parsed data to file:", str(e))
log.error("Error saving parsed data to file: %s", e)
else:
try:
with open(file_path, "w") as fh:
fh.write(json.dumps(j_list, indent=4))
stderr.print(
"[green]\tParsed data successfully saved to file:", file_path
)
except Exception as e:
stderr.print("[red]\tError saving parsed data to file:", str(e))
log.error("Error saving parsed data to file: %s", e)

def parsing_csv(self):
"""
Expand All @@ -180,7 +214,7 @@ def parsing_csv(self):


# START util functions
def handle_pangolin_data(files_list, output_folder=None):
def handle_pangolin_data(files_list, batch_date, output_folder=None):
"""File handler to parse pangolin data (csv) into JSON structured format.
Args:
Expand Down Expand Up @@ -320,7 +354,7 @@ def get_pango_data_version(files_list):
return pango_data_processed


def parse_long_table(files_list, output_folder=None):
def parse_long_table(files_list, batch_date, output_folder=None):
"""File handler to retrieve data from long table files and convert it into a JSON structured format.
This function utilizes the LongTableParse class to parse the long table data.
Since this utility handles and maps data using a custom way, it returns None to be avoid being transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table().
Expand Down Expand Up @@ -349,7 +383,7 @@ def parse_long_table(files_list, output_folder=None):
# Parsing long table data and saving it
long_table_data = long_table.parsing_csv()
# Saving long table data into a file
long_table.save_to_file(long_table_data)
long_table.save_to_file(long_table_data, batch_date)
stderr.print("[green]\tProcess completed")
elif len(files_list) > 1:
method_log_report.update_log_report(
Expand All @@ -361,7 +395,7 @@ def parse_long_table(files_list, output_folder=None):
return None


def handle_consensus_fasta(files_list, output_folder=None):
def handle_consensus_fasta(files_list, batch_date, output_folder=None):
"""File handler to parse consensus data (fasta) into JSON structured format.
Args:
Expand Down Expand Up @@ -406,3 +440,32 @@ def handle_consensus_fasta(files_list, output_folder=None):
)
method_log_report.print_log_report(method_name, ["valid", "warning"])
return consensus_data_processed


def quality_control_evaluation(data):
"""Evaluate the quality of the samples and add the field 'qc_test' to each 'data' entry."""
conditions = {
"per_sgene_ambiguous": lambda x: float(x) < 10,
"per_sgene_coverage": lambda x: float(x) > 98,
"per_ldmutations": lambda x: float(x) > 60,
"number_of_sgene_frameshifts": lambda x: int(x) == 0,
"number_of_unambiguous_bases": lambda x: int(x) > 24000,
"number_of_Ns": lambda x: int(x) < 5000,
"qc_filtered": lambda x: int(x) > 50000,
"per_reads_host": lambda x: float(x) < 20,
}
for sample in data:
try:
qc_status = "pass"
for param, condition in conditions.items():
value = sample.get(param)
if value is None or not condition(value):
qc_status = "fail"
break
sample["qc_test"] = qc_status
except ValueError as e:
sample["qc_test"] = "fail"
print(
f"Error processing sample {sample.get('sequencing_sample_id', 'unknown')}: {e}"
)
return data
Loading

0 comments on commit 607e792

Please sign in to comment.