Skip to content

Commit

Permalink
[GEN-506] Support new synapser client - Update installation (#506)
Browse files Browse the repository at this point in the history
* Use new synapser

* Use new r client

* Comment out dependencies

* Use renv to install in docker file

* Update black

* Comment Dockerfile and use renv to install R dependencies

* Remove unused code

* Add back cloning of cbioportal repo
  • Loading branch information
thomasyu888 authored Feb 8, 2023
1 parent ff2043c commit e94341d
Show file tree
Hide file tree
Showing 24 changed files with 104 additions and 105 deletions.
36 changes: 26 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM ubuntu:focal-20220113
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBIAN_FRONTEND=noninteractive

# Must install this because gpg not installed
RUN apt-get update && \
Expand Down Expand Up @@ -41,27 +41,43 @@ RUN apt-get update && apt-get install -y --allow-unauthenticated --no-install-re
# texlive-generic-recommended \
texlive-latex-extra \
# genome nexus
openjdk-8-jre && \
openjdk-8-jre \
# This is for reticulate
python3.8-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

#install pandoc 1.19.2.1 (dashboard use)
RUN wget https://github.com/jgm/pandoc/releases/download/1.19.2.1/pandoc-1.19.2.1-1-amd64.deb
RUN dpkg -i pandoc-1.19.2.1-1-amd64.deb
# RUN wget https://github.com/jgm/pandoc/releases/download/1.19.2.1/pandoc-1.19.2.1-1-amd64.deb
# RUN dpkg -i pandoc-1.19.2.1-1-amd64.deb
RUN wget https://github.com/jgm/pandoc/releases/download/3.0.1/pandoc-3.0.1-1-amd64.deb
RUN dpkg -i pandoc-3.0.1-1-amd64.deb



# Only copy most recent changes in code are always installed
# Do not build from local computer
WORKDIR /root/Genie
COPY . .
# Copy install packages first, because R installation takes
# a long time and unless there are changes to the actual
# R packages used. So the only files copied over are
# renv/ renv.lock and the installation R script
COPY R/install_packages.R R/install_packages.R
COPY renv/ renv/
COPY renv.lock renv.lock
RUN Rscript R/install_packages.R

ENV CRYPTOGRAPHY_DONT_BUILD_RUST=true
RUN Rscript R/install_packages.R
COPY . .
RUN echo "source('renv/activate.R')" >> .Rprofile

RUN python3 -m pip install --no-cache-dir cython
RUN python3 -m pip install --no-cache-dir -r requirements.txt
# RUN python3 -m pip install -e .
RUN python3 setup.py sdist
# TODO Must include R/ and templates/ within the
# genie/ directory to use MANIFEST.in
# For now, install using develop parameter so that
# the package is called from the directory
RUN pip3 install --no-cache-dir -r requirements.txt
RUN python3 setup.py develop
# RUN pip3 install --no-cache-dir .

WORKDIR /root/
# Must move this git clone to after the install of Genie,
Expand Down
6 changes: 4 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
include genie/*.sh
include R/*
# This file currently doesn't do anything because
# R/ and templates/ isn't within the genie/ directory
graft R/
graft templates/
16 changes: 2 additions & 14 deletions R/install_packages.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
install.packages("synapser", repos=c("http://ran.synapse.org", "http://cran.fhcrc.org"))
install.packages("dplyr", repos = "http://cran.r-project.org")
install.packages("argparse", repos = "http://cran.r-project.org")
install.packages("rmarkdown", repos = "http://cran.r-project.org")
install.packages("UpSetR", repos = "http://cran.r-project.org")
install.packages("testthat", repos = "http://cran.r-project.org")
install.packages("xtable", repos = "http://cran.r-project.org")
# ggpubr is used in the data guide generation
install.packages("ggpubr", repos = "http://cran.r-project.org")
install.packages("XML", repos = "http://cran.r-project.org")
install.packages("BiocManager", repos = "http://cran.r-project.org")

BiocManager::install("VariantAnnotation")

source("renv/activate.R")
renv::restore()
library(synapser)
library(dplyr)
library(argparse)
Expand Down
1 change: 0 additions & 1 deletion bin/consortium_to_public.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ def main(args):


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument(
"processingDate",
Expand Down
4 changes: 1 addition & 3 deletions genie/database_to_staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,7 @@ def remove_maf_samples(mafdf: pd.DataFrame, keep_samples: list) -> pd.DataFrame:
"""
keep_maf = mafdf["Tumor_Sample_Barcode"].isin(keep_samples)
mafdf = mafdf.loc[
keep_maf,
]
mafdf = mafdf.loc[keep_maf,]
return mafdf


Expand Down
1 change: 0 additions & 1 deletion genie/example_filetype_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def collect_errors_and_warnings(self) -> str:


class FileTypeFormat(metaclass=ABCMeta):

_process_kwargs = ["newPath", "databaseSynId"]

_fileType = "fileType"
Expand Down
1 change: 0 additions & 1 deletion genie/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

# TODO: move to models.py
class ValidationHelper(object):

# Used for the kwargs in validate_single_file
# Overload this per class
_validate_kwargs = []
Expand Down
9 changes: 0 additions & 9 deletions genie_registry/clinical.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ def remap_clinical_values(


class Clinical(FileTypeFormat):

_fileType = "clinical"

# _process_kwargs = [
Expand Down Expand Up @@ -571,7 +570,6 @@ def _validate(self, clinicaldf):
)
]
):

total_error.write(
"Sample Clinical File: PATIENT_ID's much be contained in "
"the SAMPLE_ID's (ex. SAGE-1 <-> SAGE-1-2)\n"
Expand Down Expand Up @@ -670,7 +668,6 @@ def _validate(self, clinicaldf):
and havePatientColumn
and haveSampleColumn
):

wrongCodeSamples = []
# This is to check if oncotree codes match the sex,
# returns list of samples that have conflicting codes and sex
Expand All @@ -679,12 +676,10 @@ def _validate(self, clinicaldf):
clinicaldf["PATIENT_ID"],
clinicaldf["SAMPLE_ID"],
):

if (
oncotree_mapping_dict.get(code) is not None
and sum(clinicaldf["PATIENT_ID"] == patient) > 0
):

primaryCode = oncotree_mapping_dict[code][
"ONCOTREE_PRIMARY_NODE"
]
Expand All @@ -698,14 +693,12 @@ def _validate(self, clinicaldf):
in maleOncoCodes
and sex != 1.0
):

wrongCodeSamples.append(sample)
if (
oncotree_mapping_dict[code]["ONCOTREE_PRIMARY_NODE"]
in womenOncoCodes
and sex != 2.0
):

wrongCodeSamples.append(sample)
if len(wrongCodeSamples) > 0:
warning.write(
Expand Down Expand Up @@ -845,7 +838,6 @@ def _validate(self, clinicaldf):
]
]
):

total_error.write(
"Patient Clinical File: Please double check your "
"INT_CONTACT column, it must be an integer, '>32485', "
Expand All @@ -872,7 +864,6 @@ def _validate(self, clinicaldf):
]
]
):

total_error.write(
"Patient Clinical File: Please double check your INT_DOD "
"column, it must be an integer, '>32485', '<6570', "
Expand Down
1 change: 0 additions & 1 deletion genie_registry/cna.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def checkIfOneZero(x):


class cna(FileTypeFormat):

_fileType = "cna"

_process_kwargs = ["newPath"]
Expand Down
1 change: 0 additions & 1 deletion genie_registry/fusions.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def remapFusion(gene_dict, DF, col):


class fusions(FileTypeFormat):

_fileType = "fusions"

_process_kwargs = ["newPath", "databaseSynId"]
Expand Down
1 change: 0 additions & 1 deletion genie_registry/mutationsInCis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@


class mutationsInCis(FileTypeFormat):

_fileType = "mutationsInCis"

_validation_kwargs = []
Expand Down
1 change: 0 additions & 1 deletion genie_registry/patientRetraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,4 @@


class patientRetraction(sampleRetraction):

_fileType = "patientRetraction"
1 change: 0 additions & 1 deletion genie_registry/sampleRetraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


class sampleRetraction(FileTypeFormat):

_fileType = "sampleRetraction"

_process_kwargs = ["newPath", "databaseSynId", "fileSynId"]
Expand Down
1 change: 0 additions & 1 deletion genie_registry/seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@


class seg(FileTypeFormat):

_fileType = "seg"

_process_kwargs = ["newPath", "databaseSynId"]
Expand Down
1 change: 0 additions & 1 deletion genie_registry/structural_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@


class StructuralVariant(FileTypeFormat):

_fileType = "sv"

# _validation_kwargs = ["nosymbol_check", "project_id"]
Expand Down
1 change: 0 additions & 1 deletion genie_registry/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@


class workflow(FileTypeFormat):

_fileType = "md"

_process_kwargs = ["databaseSynId"]
Expand Down
Loading

0 comments on commit e94341d

Please sign in to comment.