Skip to content

Commit

Permalink
Merge pull request #47 from daisybio/dev
Browse files Browse the repository at this point in the history
Add latest dev updates
  • Loading branch information
nictru authored Dec 20, 2024
2 parents e15ea65 + 2519655 commit e549816
Show file tree
Hide file tree
Showing 9 changed files with 306 additions and 191 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ name: Docker

on:
push:
branches: [ "master" ]
branches: [ "master", "dev" ]
pull_request:
branches: [ "master" ]
branches: [ "master", "dev" ]

env:
# Use docker.io for Docker Hub if empty
Expand Down
31 changes: 9 additions & 22 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,19 @@
FROM python:3.12.6-bullseye

# Install required packages using apt
# RUN apt-get update && apt-get install -y \
# libmariadb3 libmariadb-dev build-essential linux-headers-amd64 mariadb-connector-c \
# && rm -rf /var/lib/apt/lists/*
FROM community.wave.seqera.io/library/bioconductor-gsva_bioconductor-sponge_gunicorn_python_pruned:e9c5176f69f5398d

# Install required system dependencies for MySQL, R, and Conda
RUN apt-get update && apt-get install -y \
default-mysql-client default-libmysqlclient-dev build-essential linux-headers-amd64 \
default-mysql-client pkg-config default-libmysqlclient-dev build-essential \
&& rm -rf /var/lib/apt/lists/*

# Upgrade pip
RUN pip3 install --upgrade pip

# Install Python dependencies
WORKDIR /server
COPY . /server

RUN pip3 --no-cache-dir install -r requirements.txt
RUN pip install debugpy
COPY requirements.txt /server/requirements.txt
RUN micromamba run pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . /server

# the mariadb plugin directory seems to be misconfigured
# bei default. In order to work properly we manually adjust
# the path.
# ENV MARIADB_PLUGIN_DIR /usr/lib/mariadb/plugin

# EXPOSE 5000
# CMD ["python3", "server.py"]

#run the command to start uWSGI
# Start the application using gunicorn with UvicornWorker
CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-b", "0.0.0.0:5000", "-w", "4", "server:connex_app"]

48 changes: 48 additions & 0 deletions app/controllers/alternativeSplicing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from flask import abort
import app.models as models
from flask import Response
from app.config import db

def get_transcript_events(enst_number):
"""
Expand Down Expand Up @@ -91,3 +92,50 @@ def get_exons_for_position(start_pos: int, end_pos: int):
else:
abort(404, "No data found that satisfies the given filters")


def get_psi_values(transcript_ID: str = None, enst_number: str =None, psivec_ID: int = None, alternative_splicing_event_transcripts_ID: str = None, sample_ID: str = None, limit=100):
"""
This function response for the request: /alternativeSplicing/getPsiValue/
with the possibility to filter by psivec_ID, alternative
splicing event transcripts ID and sample ID
:param psivec_ID: ID of the psivec
:param alternative_splicing_event_transcripts_ID: ID of the alternative splicing event transcripts
:param sample_ID: ID of the sample
:return: psi value for the given parameters, ordered by psi value
"""
# Build the transcript query
transcript_query = db.select(models.Transcript.transcript_ID)
if transcript_ID:
transcript_query = transcript_query.where(models.Transcript.transcript_ID == transcript_ID)
if enst_number:
transcript_query = transcript_query.where(models.Transcript.enst_number == enst_number)

# Build the alternative splicing events query
as_query = db.select(models.AlternativeSplicingEventTranscripts.alternative_splicing_event_transcripts_ID).where(
models.AlternativeSplicingEventTranscripts.transcript_ID.in_(transcript_query)
)
if alternative_splicing_event_transcripts_ID:
as_query = as_query.where(
models.AlternativeSplicingEventTranscripts.alternative_splicing_event_transcripts_ID == alternative_splicing_event_transcripts_ID
)

# Build the psi values query
psi_query = db.select(models.PsiVec).where(
models.PsiVec.alternative_splicing_event_transcripts_ID.in_(as_query)
)
if psivec_ID:
psi_query = psi_query.where(models.PsiVec.psivec_ID == psivec_ID)
if sample_ID:
psi_query = psi_query.where(models.PsiVec.sample_ID == sample_ID)

# Apply limit and sort results
psi_query = psi_query.order_by(models.PsiVec.psi_value.desc()).limit(limit)

psi_values = db.session.execute(psi_query).scalars().all()

if psi_values:
schema = models.PsiVecSchema(many=True)
return schema.dump(psi_values)
else:
abort(404, "No data found that satisfies the given filters")

18 changes: 12 additions & 6 deletions app/controllers/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,26 @@ def _comparison_query(dataset_1, dataset_2, condition_1=None, condition_2=None,
if condition_2 is not None:
comparison = comparison.filter(models.Comparison.condition_2 == condition_2)

comparison = comparison.all()
comparisons = comparison.all()

# check if comparison is named differently
if len(comparison) == 0:
if len(comparisons) == 0:
reverse = True
comparison = models.Comparison.query \
.filter(models.Comparison.dataset_ID_1.in_(dataset_2)) \
.filter(models.Comparison.dataset_ID_2.in_(dataset_1)) \
.filter(models.Comparison.gene_transcript == gene_transcript)

if len(comparison) != 1:
abort(404, "No (unique) comparison found for given inputs")

return comparison.all(), reverse
comparisons = comparison.all()

# error if no comparison found
if len(comparisons) == 0:
abort(404, "No comparison found for given inputs")

if len(comparisons) > 1:
abort(404, "Multiple comparisons found for given inputs")

return comparisons, reverse


def get_comparison(dataset_ID: str = None, disease_name: str = None, disease_subtype=None, sponge_db_version: int = LATEST):
Expand Down
107 changes: 36 additions & 71 deletions app/controllers/geneInteraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,88 +650,53 @@ def read_mirna_for_specific_interaction(dataset_ID: int = None, disease_name=Non
:param sponge_db_version: version of the sponge database
:return: all miRNAs contributing to the interactions between genes of interest
"""
# test if any of the two identification possibilites is given
if ensg_number is None and gene_symbol is None:
abort(404, "One of the two possible identification numbers must be provided")

if ensg_number is not None and gene_symbol is not None:
abort(404,
"More than one identifikation paramter is given. Please choose one out of (ensg number, gene symbol)")

# get all sponge_runs for the given sponge_db_version
run = models.SpongeRun.query \
.filter(models.SpongeRun.sponge_db_version == sponge_db_version)

queries = []
run_IDs = []
# if specific disease_name is given:
# get diseases
disease_query = db.select(models.Dataset.dataset_ID).where(models.Dataset.sponge_db_version == sponge_db_version)
if disease_name is not None:
run = models.SpongeRun.query.join(models.Dataset, models.Dataset.dataset_ID == models.SpongeRun.dataset_ID) \
.filter(models.Dataset.disease_name.like("%" + disease_name + "%"))

disease_query = disease_query.where(models.Dataset.disease_name.like("%" + disease_name + "%"))
if dataset_ID is not None:
run = run.filter(models.Dataset.dataset_ID == dataset_ID)

run = run.all()
disease_query = disease_query.where(models.Dataset.dataset_ID == dataset_ID)

if len(run) > 0:
run_IDs = [i.sponge_run_ID for i in run]
queries.append(models.miRNAInteraction.sponge_run_ID.in_(run_IDs))
else:
abort(404, "No dataset with given disease_name found")
# filter runs for diseases
run_query = db.select(models.SpongeRun.sponge_run_ID).where(models.SpongeRun.dataset_ID.in_(disease_query))

gene = []
# if ensg_numer is given to specify gene(s), get the intern gene_ID(primary_key) for requested ensg_nr(gene_ID)
# get gene IDs
gene_query = db.select(models.Gene.gene_ID)
if ensg_number is not None:
gene = models.Gene.query \
.filter(models.Gene.ensg_number.in_(ensg_number)) \
.all()
# if gene_symbol is given to specify gene(s), get the intern gene_ID(primary_key) for requested gene_symbol(gene_ID)
elif gene_symbol is not None:
gene = models.Gene.query \
.filter(models.Gene.gene_symbol.in_(gene_symbol)) \
.all()
gene_query = gene_query.where(models.Gene.ensg_number.in_(ensg_number))
if gene_symbol is not None:
gene_query = gene_query.where(models.Gene.gene_symbol.in_(gene_symbol))

gene_IDs = []
if len(gene) > 0:
gene_IDs = [i.gene_ID for i in gene]
queries.append(models.miRNAInteraction.gene_ID.in_(gene_IDs))
else:
abort(404, "No gene found for given identifiers.")
# Get all interactions for the given genes and runs
base_interaction_query = db.select(models.miRNAInteraction).where(
models.miRNAInteraction.gene_ID.in_(gene_query),
models.miRNAInteraction.sponge_run_ID.in_(run_query),
)

interaction_result = []
if between:
# an Engine, which the Session will use for connection resources
some_engine = sa.create_engine(os.getenv("SPONGE_DB_URI"), pool_recycle=30)

# create a configured "Session" class
Session = sa.orm.sessionmaker(bind=some_engine)

# create a Session
session = Session()
# test for each dataset if the gene(s) of interest are included in the ceRNA network

mirna_filter = session.execute(text("select mirna_ID from interactions_genemirna where sponge_run_ID IN ( "
+ ','.join(str(e) for e in run_IDs) + ") and gene_ID IN ( "
+ ','.join(str(e) for e in gene_IDs)
+ ") group by mirna_ID HAVING count(mirna_ID) >= 2;")).fetchall()

session.close()
some_engine.dispose()

if len(mirna_filter) == 0:
abort(404, "No shared miRNA between genes found.")
# Subquery to count distinct genes
distinct_gene_count_subquery = (
db.select(db.func.count(db.func.distinct(gene_query.c.gene_ID))).scalar_subquery()
)
print(distinct_gene_count_subquery)

# Subquery to get miRNA IDs that meet the 'between' condition
mirna_query = db.select(models.miRNAInteraction.miRNA_ID) \
.where(models.miRNAInteraction.gene_ID.in_(gene_query)) \
.where(models.miRNAInteraction.sponge_run_ID.in_(run_query)) \
.group_by(models.miRNAInteraction.miRNA_ID) \
.having(db.func.count(models.miRNAInteraction.gene_ID) == distinct_gene_count_subquery)

# Filter interactions by the miRNA IDs from the previous subquery
interaction_query = base_interaction_query.where(
models.miRNAInteraction.miRNA_ID.in_(mirna_query)
)
else:
interaction_query = base_interaction_query

flat_mirna_filter = [item for sublist in mirna_filter for item in sublist]
queries.append(models.miRNAInteraction.miRNA_ID.in_(flat_mirna_filter))
interaction_result = db.session.execute(interaction_query).scalars().all()

interaction_result = models.miRNAInteraction.query \
.filter(*queries) \
.all()
else:
interaction_result = models.miRNAInteraction.query \
.filter(*queries) \
.all()

if len(interaction_result) > 0:
# Serialize the data for the response depending on parameter all
Expand Down
Loading

0 comments on commit e549816

Please sign in to comment.