Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix ECs in bacdive, cell_shape in bacdive and madin_etal #269

Merged
merged 6 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion kg_microbe/transform_utils/bacdive/bacdive.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
DOMAIN,
DSM_NUMBER,
DSM_NUMBER_COLUMN,
EC_CATEGORY,
EC_KEY,
EC_PREFIX,
ENZYME_TO_ASSAY_EDGE,
Expand Down Expand Up @@ -484,7 +485,7 @@ def run(self, data_file: Union[Optional[Path], Optional[str]] = None, show_statu
assay_nodes_to_write.append(
[
ec_id,
PHENOTYPIC_CATEGORY,
EC_CATEGORY,
assay[BACDIVE_MAPPING_ENZYME_LABEL],
]
+ [None] * (len(self.node_header) - 3)
Expand Down
11 changes: 11 additions & 0 deletions kg_microbe/transform_utils/bactotraits/bactotraits.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,15 @@ def run(
combo_curie_map = {
key: value for key, value in custom_curie_map.items() if COMBO_KEY in value
}
unique_combo_node_data = [
(
inner_curie_map[CURIE_COLUMN],
inner_curie_map[CATEGORY_COLUMN],
inner_curie_map[NAME_COLUMN],
)
for _, v in combo_curie_map.items()
for inner_curie_map in v[COMBO_KEY]
]
unique_combo_edge_data = [
(
v[CURIE_COLUMN],
Expand All @@ -257,6 +266,7 @@ def run(
for inner_curie_map in v[COMBO_KEY]
]
combo_edge_data = [list(edge) for edge in unique_combo_edge_data]
combo_node_data = [list(edge) for edge in unique_combo_node_data]

progress_class = tqdm if show_status else DummyTqdm
with progress_class() as progress:
Expand Down Expand Up @@ -333,6 +343,7 @@ def run(
progress.set_description(f"Processing line #{i}")
# After each iteration, call the update method to advance the progress bar.
progress.update(1)
node_writer.writerows(combo_node_data)
edge_writer.writerows(combo_edge_data)
drop_duplicates(self.output_node_file)
drop_duplicates(self.output_edge_file)
1 change: 0 additions & 1 deletion kg_microbe/transform_utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@
MEDIUM_TYPE_CATEGORY = "biolink:ChemicalMixture"
SOLUTION_CATEGORY = "biolink:ChemicalEntity"
INGREDIENT_CATEGORY = "biolink:ChemicalEntity"
SHAPE_CATEGORY = "biolink:AbstractEntity"
METABOLISM_CATEGORY = "biolink:ActivityAndBehavior"
PATHWAY_CATEGORY = "biolink:BiologicalProcess"
CARBON_SUBSTRATE_CATEGORY = "biolink:ChemicalEntity"
Expand Down
4 changes: 2 additions & 2 deletions kg_microbe/transform_utils/custom_curies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ trophic_type:
curie: "trophic_type:heterotrophy"
name: "heterotroph"
<<: *biological_process_block
tt_heterotrph: *heterotroph_block
tt_heterotroph: *heterotroph_block
chemoautolithotroph:
curie: "trophic_type:chemoautolithotrophy"
name: "chemoautolithotroph"
Expand Down Expand Up @@ -363,7 +363,7 @@ cell_shape:
name: "ellipsoidal shaped cell"
<<: *phenotypic_quality_block

curved_spiral_shaped:
s_curved_spiral:
curie: "cell_shape:curved_spiral"
name: "curved spiral shaped cell"
<<: *phenotypic_quality_block
Expand Down
8 changes: 6 additions & 2 deletions kg_microbe/transform_utils/madin_etal/madin_etal.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@
PATHWAY_CATEGORY,
PATHWAY_PREFIX,
PATHWAYS_COLUMN,
PHENOTYPIC_CATEGORY,
PREFERRED_TERM_KEY,
ROLE_CATEGORY,
SHAPE_CATEGORY,
SHAPE_PREFIX,
SUBJECT_LABEL_COLUMN,
TAX_ID_COLUMN,
Expand Down Expand Up @@ -363,7 +363,11 @@ def run(self, data_file: Union[Optional[Path], Optional[str]] = None, show_statu
else filtered_row[CELL_SHAPE_COLUMN]
)
if cell_shape:
cell_shape_node = [SHAPE_PREFIX + cell_shape, SHAPE_CATEGORY, cell_shape]
cell_shape_node = [
SHAPE_PREFIX + cell_shape,
PHENOTYPIC_CATEGORY,
cell_shape,
]
tax_to_cell_shape_edge = [
tax_id,
NCBI_TO_SHAPE_EDGE,
Expand Down
Loading