Skip to content

Commit

Permalink
Add mypy pre-commit (#521)
Browse files Browse the repository at this point in the history
* Add in pre-commit and run on all files

* Add in bandit code scanning

* Lint

* Run linter

* Uncomment out mypy

* Start fixing mypy errors

* Fix mypy issues

* Fix mypy issues

* Fix test
  • Loading branch information
thomasyu888 authored Jun 7, 2023
1 parent 99fdc33 commit 39e1539
Show file tree
Hide file tree
Showing 14 changed files with 175 additions and 152 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ repos:
# - id: blacken-docs
# additional_dependencies: [black]

# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: 'v1.0.1'
# hooks:
# - id: mypy
# additional_dependencies: [pydantic~=1.10]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.3.0'
hooks:
- id: mypy
# additional_dependencies: [pydantic~=1.10]

# Checks for missing docstrings
# - repo: https://github.com/econchick/interrogate
Expand Down
2 changes: 1 addition & 1 deletion bin/database_to_staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def main(
parser.add_argument(
"--consortiumReleaseCutOff",
type=int,
metavar=184,
metavar="184",
default=184,
help="Consortium release cut off time in days",
)
Expand Down
10 changes: 5 additions & 5 deletions genie/example_filetype_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import List, Optional

import pandas as pd

import synapseclient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -54,14 +54,14 @@ class FileTypeFormat(metaclass=ABCMeta):

_fileType = "fileType"

_validation_kwargs = []
_validation_kwargs: List[str] = []

def __init__(
self,
syn: object,
syn: synapseclient.Synapse,
center: str,
genie_config: dict = None,
ancillary_files: List[List[object]] = None,
genie_config: Optional[dict] = None,
ancillary_files: Optional[List[List[synapseclient.Entity]]] = None,
):
"""A validator helper class for a center's files.
Expand Down
4 changes: 2 additions & 2 deletions genie/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""

import logging
from typing import List
from typing import List, Optional

import synapseclient
import synapseutils
Expand Down Expand Up @@ -285,7 +285,7 @@ def get_genie_config(

# TODO: Remove oncotree_link parameter from this function
def _get_oncotreelink(
syn: synapseclient.Synapse, genie_config: dict, oncotree_link: str = None
syn: synapseclient.Synapse, genie_config: dict, oncotree_link: Optional[str] = None
) -> str:
"""
Gets oncotree link unless a link is specified by the user
Expand Down
245 changes: 128 additions & 117 deletions genie/input_to_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@
import logging
import os
import time
from typing import List, Dict
from typing import List, Optional

import synapseclient # lgtm [py/import-and-import-from]
import synapseclient
from synapseclient import Synapse
from synapseclient.core.utils import to_unix_epoch_time
import synapseutils
import pandas as pd

from genie import (
Expand Down Expand Up @@ -166,117 +165,117 @@ def _get_status_and_error_list(valid, message, entities):
return input_status_list, invalid_errors_list


def get_ancillary_files(
syn: synapseclient.Synapse,
synid: str,
project_id: str,
center: str,
process: str = "main",
downloadFile: bool = True,
genie_config: list = None,
format_registry: list = None,
) -> Dict[str, Dict[str, object]]:
"""Walks through each center's input directory
to get a dict of center files
Args:
syn (synapseclient.Synapse): Synapse connection
synid (str): Synapse Id of a folder
project_id (str): GENIE Synapse project id
center (str): GENIE center name
process (str, optional): Process type include "main", "mutation".
Defaults to "main".
downloadFile (bool, optional): Downloads the file. Defaults to True.
Returns:
dict: {entity_name: {
entity: Synapse.File
filetypeformat_object: FileTypeFormat
}
"""
logger.info("GETTING {center} INPUT FILES".format(center=center))
clinical_pair_name = [
"data_clinical_supp_sample_{center}.txt".format(center=center),
"data_clinical_supp_patient_{center}.txt".format(center=center),
]
clinicalpair_entities = []

center_files = synapseutils.walk(syn, synid)
prepared_center_files = {}

for _, _, entities in center_files:
for name, ent_synid in entities:
if name in clinical_pair_name:
clinicalpair_entities.append(ent)
continue

if name.endswith(".vcf") and process != "mutation":
continue

ent = syn.get(ent_synid, downloadFile=downloadFile)

validator = validate.GenieValidationHelper(
syn=syn,
project_id=project_id,
center=center,
entitylist=[ent],
format_registry=format_registry,
genie_config=genie_config,
ancillary_files=None,
)
filetype = validator.file_type
if validator.file_type not in validator._format_registry:
continue
validator_cls = validator._format_registry[validator.file_type]
fileformat_validator = validator_cls(
syn=validator._synapse_client,
center=validator.center,
genie_config=validator.genie_config,
ancillary_files=None,
)

prepared_center_files[name] = {}
prepared_center_files[name]["entity"] = ent
prepared_center_files[name]["filetypeformat_object"] = fileformat_validator

# if the clinical files exist
if clinicalpair_entities:
# handling for just the clinical pair, can remove once we have separate classes
cli_validator = validate.GenieValidationHelper(
syn=syn,
project_id=project_id,
center=center,
entitylist=clinicalpair_entities,
format_registry=format_registry,
genie_config=genie_config,
ancillary_files=None,
)
cli_filetype = cli_validator.file_type
cli_validator_cls = cli_validator._format_registry[cli_validator.file_type]
cli_fileformat_validator = validator_cls(
syn=cli_validator._synapse_client,
center=cli_validator.center,
genie_config=cli_validator.genie_config,
ancillary_files=None,
)

prepared_center_files[name] = {}
prepared_center_files[name]["entity"] = clinicalpair_entities
prepared_center_files[name]["filetypeformat_object"] = cli_fileformat_validator
return prepared_center_files
# def get_ancillary_files(
# syn: synapseclient.Synapse,
# synid: str,
# project_id: str,
# center: str,
# process: str = "main",
# downloadFile: bool = True,
# genie_config: list = None,
# format_registry: list = None,
# ) -> Dict[str, Dict[str, object]]:
# """Walks through each center's input directory
# to get a dict of center files

# Args:
# syn (synapseclient.Synapse): Synapse connection
# synid (str): Synapse Id of a folder
# project_id (str): GENIE Synapse project id
# center (str): GENIE center name
# process (str, optional): Process type include "main", "mutation".
# Defaults to "main".
# downloadFile (bool, optional): Downloads the file. Defaults to True.

# Returns:
# dict: {entity_name: {
# entity: Synapse.File
# filetypeformat_object: FileTypeFormat
# }
# """
# logger.info("GETTING {center} INPUT FILES".format(center=center))
# clinical_pair_name = [
# "data_clinical_supp_sample_{center}.txt".format(center=center),
# "data_clinical_supp_patient_{center}.txt".format(center=center),
# ]
# clinicalpair_entities = []

# center_files = synapseutils.walk(syn, synid)
# prepared_center_files = {}

# for _, _, entities in center_files:
# for name, ent_synid in entities:
# if name in clinical_pair_name:
# clinicalpair_entities.append(ent)
# continue

# if name.endswith(".vcf") and process != "mutation":
# continue

# ent = syn.get(ent_synid, downloadFile=downloadFile)

# validator = validate.GenieValidationHelper(
# syn=syn,
# project_id=project_id,
# center=center,
# entitylist=[ent],
# format_registry=format_registry,
# genie_config=genie_config,
# ancillary_files=None,
# )
# filetype = validator.file_type
# if validator.file_type not in validator._format_registry:
# continue
# validator_cls = validator._format_registry[validator.file_type]
# fileformat_validator = validator_cls(
# syn=validator._synapse_client,
# center=validator.center,
# genie_config=validator.genie_config,
# ancillary_files=None,
# )

# prepared_center_files[name] = {}
# prepared_center_files[name]["entity"] = ent
# prepared_center_files[name]["filetypeformat_object"] = fileformat_validator

# # if the clinical files exist
# if clinicalpair_entities:
# # handling for just the clinical pair, can remove once we have separate classes
# cli_validator = validate.GenieValidationHelper(
# syn=syn,
# project_id=project_id,
# center=center,
# entitylist=clinicalpair_entities,
# format_registry=format_registry,
# genie_config=genie_config,
# ancillary_files=None,
# )
# cli_filetype = cli_validator.file_type
# cli_validator_cls = cli_validator._format_registry[cli_validator.file_type]
# cli_fileformat_validator = validator_cls(
# syn=cli_validator._synapse_client,
# center=cli_validator.center,
# genie_config=cli_validator.genie_config,
# ancillary_files=None,
# )

# prepared_center_files[name] = {}
# prepared_center_files[name]["entity"] = clinicalpair_entities
# prepared_center_files[name]["filetypeformat_object"] = cli_fileformat_validator
# return prepared_center_files


# TODO: Add to validation.py
def validatefile(
syn,
project_id,
entities,
validation_status_table,
error_tracker_table,
center,
format_registry=None,
genie_config=None,
ancillary_files=None,
syn: synapseclient.Synapse,
project_id: str,
entities: List[synapseclient.File],
validation_status_table: synapseclient.table.CsvFileTable,
error_tracker_table: synapseclient.table.CsvFileTable,
center: str,
format_registry: Optional[dict] = None,
genie_config: Optional[dict] = None,
ancillary_files: Optional[list] = None,
):
"""Validate a list of entities.
Expand All @@ -289,18 +288,24 @@ def validatefile(
validation_status_table: Validation status dataframe
error_tracker_table: Invalid files error tracking dataframe
center: Center of interest
format_registry (typing.List, optional): GENIE file format registry.
format_registry (list, optional): GENIE file format registry.
Defaults to None.
genie_config (typing.Dict, optional): See example of genie config at
genie_config (list, optional): See example of genie config at
./genie_config.json. Defaults to None.
ancillary_files: all files downloaded for validation
ancillary_files (list): all files downloaded for validation
Returns:
tuple: input_status_list - status of input files,
invalid_errors_list - error list
messages_to_send - list of tuples with (filenames, message, file_users)
"""
# TODO: Look into if errors should be thrown if these are None
# Aka. should these actually be optional params
if genie_config is None:
genie_config = {}
if format_registry is None:
format_registry = {}

# filepaths = [entity.path for entity in entities]
filenames = [entity.name for entity in entities]
Expand Down Expand Up @@ -800,8 +805,8 @@ def center_input_to_database(
process: str,
only_validate: bool,
delete_old: bool = False,
format_registry: list = None,
genie_config: dict = None,
format_registry: Optional[dict] = None,
genie_config: Optional[dict] = None,
):
"""Processing per center
Expand All @@ -812,11 +817,17 @@ def center_input_to_database(
process (str): main or mutation processing
only_validate (bool): Only validate or not
delete_old (bool, optional): Delete old files. Defaults to False.
format_registry (typing.List, optional): GENIE file format registry.
format_registry (dict, optional): GENIE file format registry.
Defaults to None.
genie_config (typing.Dict, optional): See example of genie config at
genie_config (dict, optional): See example of genie config at
./genie_config.json. Defaults to None.
"""
# TODO: Look into if errors should be thrown if these are None
# Aka. should these actually be optional params
if genie_config is None:
genie_config = {}
if format_registry is None:
format_registry = {}

if only_validate:
log_path = os.path.join(
Expand Down
2 changes: 1 addition & 1 deletion genie/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def update_table(
newData: pd.DataFrame,
filterBy: str,
filterByColumn: str = "CENTER",
col: List[str] = None,
col: Optional[List[str]] = None,
toDelete: bool = False,
):
"""Update Synapse table given a new dataframe
Expand Down
Loading

0 comments on commit 39e1539

Please sign in to comment.