Skip to content

Commit

Permalink
Merge pull request #656 from alliance-genome/KANBAN-309
Browse files Browse the repository at this point in the history
KANBAN-309 Fixed rounding. Added tests and indices.
  • Loading branch information
christabone authored Aug 29, 2023
2 parents d659a23 + 4194d76 commit 2c85311
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 2 deletions.
4 changes: 4 additions & 0 deletions src/etl/helpers/neo4j_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,7 @@ def create_indices():

for index in two_composite_indices:
session.run("CREATE INDEX FOR ({}) ON ({}, {})".format(index[0], index[1], index[2]))

# Run our relationship indices.
session.run("CREATE INDEX rel_orthology_idx FOR ()-[r:ORTHOLOGOUS]-() on (r.isBestScore, r.isBestRevScore, r.strictFilter, r.moderateFilter)")
session.run("CREATE INDEX rel_paralogy_idx FOR ()-[r:PARALOGOUS]-() on (r.rank, r.length, r.similarity, r.identity)")
13 changes: 11 additions & 2 deletions src/etl/paralogy_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,22 @@ def get_generators(self, datafile, sub_type, batch_size): # noqa
counter = counter - 1
continue

# Obtain similarity from dictionary and round to two decimal places.
similarity = para_record['similarity']
if isinstance(similarity, (float, int)):
similarity = round(similarity, 2)

identity = para_record['identity']
if isinstance(identity, (float, int)):
identity = round(identity, 2)

if gene_1_agr_primary_id is not None and gene_2_agr_primary_id is not None:

para_dataset = {
'rank': para_record['rank'],
'length': para_record['length'],
'similarity': para_record['similarity'],
'identity': para_record['identity'],
'similarity': similarity,
'identity': identity,

'gene1AgrPrimaryId': gene_1_agr_primary_id,
'gene2AgrPrimaryId': gene_2_agr_primary_id,
Expand Down
78 changes: 78 additions & 0 deletions src/test/specific_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,84 @@ def test_spell_cross_ref_type():
assert record["counter"] < 1


def test_paralogous_properties():
"""Test PARALOGOUS Relationship Properties"""

# Construct the query.
query = """
MATCH ()-[r:PARALOGOUS]->()
WHERE
r.identity IS NOT NULL
AND r.similarity IS NOT NULL
AND r.length IS NOT NULL
AND r.rank IS NOT NULL
RETURN
r.identity AS identity,
r.similarity AS similarity,
r.length AS length,
r.rank AS rank
LIMIT 25
"""

# Execute the query and check the properties.
with Neo4jHelper.run_single_query(query) as result:
for record in result:

# Convert properties to appropriate types.
identity = float(record["identity"])
similarity = float(record["similarity"])
length = float(record["length"])
rank = int(record["rank"])

# Assert conditions based on requirements.
assert 0.0 <= identity <= 1.0, f"Invalid identity value: {identity}"
assert 0.0 <= similarity <= 1.0, f"Invalid similarity value: {similarity}"
assert length > 0, f"Invalid length value: {length}"
assert rank > 0, f"Invalid rank value: {rank}"


def test_orthologous_properties():
"""Test ORTHOLOGOUS Relationship Properties"""

# Construct the query.
query = """
MATCH ()-[r:ORTHOLOGOUS]->()
RETURN
r.isBestRevScore AS isBestRevScore,
r.isBestScore AS isBestScore,
r.moderateFilter AS moderateFilter,
r.strictFilter AS strictFilter
LIMIT 25
"""

# Helper function to convert "Yes"/"No" and "true"/"false" to boolean.
def to_bool(value):
if isinstance(value, bool): # Check if the value is already a boolean
return value
elif value.lower() in ["yes", "true"]:
return True
elif value.lower() in ["no", "false"]:
return False
else:
raise ValueError(f"Invalid boolean string: {value}")

# Execute the query and check the properties.
with Neo4jHelper.run_single_query(query) as result:
for record in result:

# Convert properties to appropriate types.
isBestRevScore = to_bool(record["isBestRevScore"])
isBestScore = to_bool(record["isBestScore"])
moderateFilter = to_bool(record["moderateFilter"])
strictFilter = to_bool(record["strictFilter"])

# Assert conditions based on requirements.
assert isinstance(isBestRevScore, bool), f"Invalid isBestRevScore value: {isBestRevScore}"
assert isinstance(isBestScore, bool), f"Invalid isBestScore value: {isBestScore}"
assert isinstance(moderateFilter, bool), f"Invalid moderateFilter value: {moderateFilter}"
assert isinstance(strictFilter, bool), f"Invalid strictFilter value: {strictFilter}"


def test_genes_have_automated_description():
"""Test Genes Have Automated Description"""

Expand Down
2 changes: 2 additions & 0 deletions src/test/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def __init__(self, use_test_object):
# allele
'SGD:S000283439', 'SGD:S000000316', 'SGD:S000005770', 'SGD:S000277574',
'SGD:S000000383', 'SGD:S000297409', 'SGD:S000006064', 'SGD:S000297411',
# paralogy
'SGD:S000028562', 'SGD:S000028564', 'SGD:S000004146', 'SGD:S000004149',
}

self.zfin_id_set = {
Expand Down

0 comments on commit 2c85311

Please sign in to comment.