Skip to content

Commit

Permalink
add flag for germline in structure variant file
Browse files Browse the repository at this point in the history
  • Loading branch information
danlu1 committed Oct 28, 2024
1 parent 6ec4fe1 commit 835961e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
9 changes: 4 additions & 5 deletions genie_registry/structural_variant.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from io import StringIO
import logging
import os
from io import StringIO

from pandas import DataFrame

from genie.example_filetype_format import FileTypeFormat
from genie import load, process_functions, validate
from genie.example_filetype_format import FileTypeFormat
from pandas import DataFrame

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -91,7 +90,7 @@ def _validate(self, sv_df):
warn, error = process_functions.check_col_and_values(
df=sv_df,
col="SV_STATUS",
possible_values=["SOMATIC", "GERMLINE"],
possible_values=["SOMATIC"],
filename="Structural Variant",
required=True,
)
Expand Down
26 changes: 21 additions & 5 deletions tests/test_sv.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from unittest.mock import patch

import pandas as pd

from genie_registry.structural_variant import StructuralVariant
from genie import validate
from genie_registry.structural_variant import StructuralVariant


class TestSv:
Expand Down Expand Up @@ -52,7 +51,7 @@ def test_validation_sample_error(self):
sv_df = pd.DataFrame(
{
"sample_id": ["GENIE-SAGE-ID1-1", "GENIE-SAGE-ID1-1", "ID3-1"],
"SV_STATUS": ["SOMATIC", "SOMATIC", "GERMLINE"],
"SV_STATUS": ["SOMATIC", "SOMATIC", "SOMATIC"],
}
)
error, warning = self.sv_cls._validate(sv_df)
Expand Down Expand Up @@ -80,7 +79,7 @@ def test_validation_integer_check(self):
sv_df = pd.DataFrame(
{
"sample_id": ["GENIE-SAGE-ID1-1", "GENIE-SAGE-ID2-1"],
"SV_STATUS": ["SOMATIC", "GERMLINE"],
"SV_STATUS": ["SOMATIC", "SOMATIC"],
"SITE1_ENTREZ_GENE_ID": [1, "foo"],
"SITE2_ENTREZ_GENE_ID": [1, "foo"],
"SITE1_REGION_NUMBER": [1, "foo"],
Expand Down Expand Up @@ -118,7 +117,7 @@ def test_validation_no_errors(self):
"GENIE-SAGE-ID2-1",
"GENIE-SAGE-ID3-1",
],
"SV_STATUS": ["SOMATIC", "GERMLINE", "GERMLINE"],
"SV_STATUS": ["SOMATIC", "SOMATIC", "SOMATIC"],
"SITE1_ENTREZ_GENE_ID": [1, 2, 2],
"SITE2_ENTREZ_GENE_ID": [1, 3, 3],
"SITE1_REGION_NUMBER": [1, 2, 2],
Expand Down Expand Up @@ -154,3 +153,20 @@ def test_validation__validate_chromosome_is_called(self):
"_validate_chromosome should be called twice for sv file"
"since it has two potential chromosome columns to check"
)

def test_validation_flag_GERMLINE_in_SV_STATUS(self):
sv_df = pd.DataFrame(
{
"sample_id": [
"GENIE-SAGE-ID1-1",
"GENIE-SAGE-ID2-1",
"GENIE-SAGE-ID3-1",
],
"SV_STATUS": ["SOMATIC", "SOMATIC", "GERMLINE"],
}
)
error, warning = self.sv_cls._validate(sv_df)
assert error == (
"Structural Variant: Please double check your SV_STATUS column. This column must only be these values: SOMATIC\n"
)
assert warning == ""

0 comments on commit 835961e

Please sign in to comment.