diff --git a/src/etl/helpers/neo4j_helper.py b/src/etl/helpers/neo4j_helper.py index cfd78be1..98c23ff3 100644 --- a/src/etl/helpers/neo4j_helper.py +++ b/src/etl/helpers/neo4j_helper.py @@ -190,3 +190,7 @@ def create_indices(): for index in two_composite_indices: session.run("CREATE INDEX FOR ({}) ON ({}, {})".format(index[0], index[1], index[2])) + + # Run our relationship indices. + session.run("CREATE INDEX rel_orthology_idx FOR ()-[r:ORTHOLOGOUS]-() on (r.isBestScore, r.isBestRevScore, r.strictFilter, r.moderateFilter)") + session.run("CREATE INDEX rel_paralogy_idx FOR ()-[r:PARALOGOUS]-() on (r.rank, r.length, r.similarity, r.identity)") diff --git a/src/etl/paralogy_etl.py b/src/etl/paralogy_etl.py index a792f30e..8209044c 100644 --- a/src/etl/paralogy_etl.py +++ b/src/etl/paralogy_etl.py @@ -217,13 +217,22 @@ def get_generators(self, datafile, sub_type, batch_size): # noqa counter = counter - 1 continue + # Obtain similarity from dictionary and round to two decimal places. + similarity = para_record['similarity'] + if isinstance(similarity, (float, int)): + similarity = round(similarity, 2) + + identity = para_record['identity'] + if isinstance(identity, (float, int)): + identity = round(identity, 2) + if gene_1_agr_primary_id is not None and gene_2_agr_primary_id is not None: para_dataset = { 'rank': para_record['rank'], 'length': para_record['length'], - 'similarity': para_record['similarity'], - 'identity': para_record['identity'], + 'similarity': similarity, + 'identity': identity, 'gene1AgrPrimaryId': gene_1_agr_primary_id, 'gene2AgrPrimaryId': gene_2_agr_primary_id, diff --git a/src/test/specific_tests.py b/src/test/specific_tests.py index 41276fab..9793d020 100644 --- a/src/test/specific_tests.py +++ b/src/test/specific_tests.py @@ -141,6 +141,84 @@ def test_spell_cross_ref_type(): assert record["counter"] < 1 +def test_paralogous_properties(): + """Test PARALOGOUS Relationship Properties""" + + # Construct the query. + query = """ + MATCH ()-[r:PARALOGOUS]->() + WHERE + r.identity IS NOT NULL + AND r.similarity IS NOT NULL + AND r.length IS NOT NULL + AND r.rank IS NOT NULL + RETURN + r.identity AS identity, + r.similarity AS similarity, + r.length AS length, + r.rank AS rank + LIMIT 25 + """ + + # Execute the query and check the properties. + with Neo4jHelper.run_single_query(query) as result: + for record in result: + + # Convert properties to appropriate types. + identity = float(record["identity"]) + similarity = float(record["similarity"]) + length = float(record["length"]) + rank = int(record["rank"]) + + # Assert conditions based on requirements. + assert 0.0 <= identity <= 1.0, f"Invalid identity value: {identity}" + assert 0.0 <= similarity <= 1.0, f"Invalid similarity value: {similarity}" + assert length > 0, f"Invalid length value: {length}" + assert rank > 0, f"Invalid rank value: {rank}" + + +def test_orthologous_properties(): + """Test ORTHOLOGOUS Relationship Properties""" + + # Construct the query. + query = """ + MATCH ()-[r:ORTHOLOGOUS]->() + RETURN + r.isBestRevScore AS isBestRevScore, + r.isBestScore AS isBestScore, + r.moderateFilter AS moderateFilter, + r.strictFilter AS strictFilter + LIMIT 25 + """ + + # Helper function to convert "Yes"/"No" and "true"/"false" to boolean. + def to_bool(value): + if isinstance(value, bool): # Check if the value is already a boolean + return value + elif value.lower() in ["yes", "true"]: + return True + elif value.lower() in ["no", "false"]: + return False + else: + raise ValueError(f"Invalid boolean string: {value}") + + # Execute the query and check the properties. + with Neo4jHelper.run_single_query(query) as result: + for record in result: + + # Convert properties to appropriate types. + isBestRevScore = to_bool(record["isBestRevScore"]) + isBestScore = to_bool(record["isBestScore"]) + moderateFilter = to_bool(record["moderateFilter"]) + strictFilter = to_bool(record["strictFilter"]) + + # Assert conditions based on requirements. + assert isinstance(isBestRevScore, bool), f"Invalid isBestRevScore value: {isBestRevScore}" + assert isinstance(isBestScore, bool), f"Invalid isBestScore value: {isBestScore}" + assert isinstance(moderateFilter, bool), f"Invalid moderateFilter value: {moderateFilter}" + assert isinstance(strictFilter, bool), f"Invalid strictFilter value: {strictFilter}" + + def test_genes_have_automated_description(): """Test Genes Have Automated Description""" diff --git a/src/test/test_object.py b/src/test/test_object.py index 725e8112..62c3f03b 100644 --- a/src/test/test_object.py +++ b/src/test/test_object.py @@ -149,6 +149,8 @@ def __init__(self, use_test_object): # allele 'SGD:S000283439', 'SGD:S000000316', 'SGD:S000005770', 'SGD:S000277574', 'SGD:S000000383', 'SGD:S000297409', 'SGD:S000006064', 'SGD:S000297411', + # paralogy + 'SGD:S000028562', 'SGD:S000028564', 'SGD:S000004146', 'SGD:S000004149', } self.zfin_id_set = {