Merge pull request #47 from daisybio/dev

Add latest dev updates
daisybio · Dec 20, 2024 · e549816 · e549816
2 parents e15ea65 + 2519655
commit e549816
Show file tree

Hide file tree

Showing 9 changed files with 306 additions and 191 deletions.
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
@@ -7,9 +7,9 @@ name: Docker
 
 on:
   push:
-    branches: [ "master" ]
+    branches: [ "master", "dev" ]
   pull_request:
-    branches: [ "master" ]
+    branches: [ "master", "dev" ]
 
 env:
   # Use docker.io for Docker Hub if empty

diff --git a/Dockerfile b/Dockerfile
@@ -1,32 +1,19 @@
-FROM python:3.12.6-bullseye
-
-# Install required packages using apt
-# RUN apt-get update && apt-get install -y \
-#     libmariadb3 libmariadb-dev build-essential linux-headers-amd64 mariadb-connector-c \
-#     && rm -rf /var/lib/apt/lists/*
+FROM community.wave.seqera.io/library/bioconductor-gsva_bioconductor-sponge_gunicorn_python_pruned:e9c5176f69f5398d
 
+# Install required system dependencies for MySQL, R, and Conda
 RUN apt-get update && apt-get install -y \
-    default-mysql-client default-libmysqlclient-dev build-essential linux-headers-amd64 \
+    default-mysql-client pkg-config default-libmysqlclient-dev build-essential \
     && rm -rf /var/lib/apt/lists/*
 
-# Upgrade pip
-RUN pip3 install --upgrade pip
 
+# Install Python dependencies
 WORKDIR /server
-COPY . /server
-
-RUN pip3 --no-cache-dir install -r requirements.txt
-RUN pip install debugpy
+COPY requirements.txt /server/requirements.txt
+RUN micromamba run pip install --no-cache-dir -r requirements.txt
 
+# Copy application code
+COPY . /server
 
-# the mariadb plugin directory seems to be misconfigured
-# bei default. In order to work properly we manually adjust
-# the path.
-# ENV MARIADB_PLUGIN_DIR /usr/lib/mariadb/plugin
-
-# EXPOSE 5000
-# CMD ["python3", "server.py"]
-
-#run the command to start uWSGI
+# Start the application using gunicorn with UvicornWorker
 CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-b", "0.0.0.0:5000", "-w", "4", "server:connex_app"]
 
diff --git a/app/controllers/alternativeSplicing.py b/app/controllers/alternativeSplicing.py
@@ -1,6 +1,7 @@
 from flask import abort
 import app.models as models
 from flask import Response
+from app.config import db
 
 def get_transcript_events(enst_number):
     """
@@ -91,3 +92,50 @@ def get_exons_for_position(start_pos: int, end_pos: int):
     else:
         abort(404, "No data found that satisfies the given filters")
 
+
+def get_psi_values(transcript_ID: str = None, enst_number: str =None, psivec_ID: int = None, alternative_splicing_event_transcripts_ID: str = None, sample_ID: str = None, limit=100):
+    """
+    This function response for the request: /alternativeSplicing/getPsiValue/
+    with the possibility to filter by psivec_ID, alternative
+    splicing event transcripts ID and sample ID
+    :param psivec_ID: ID of the psivec
+    :param alternative_splicing_event_transcripts_ID: ID of the alternative splicing event transcripts
+    :param sample_ID: ID of the sample
+    :return: psi value for the given parameters, ordered by psi value
+    """
+    # Build the transcript query
+    transcript_query = db.select(models.Transcript.transcript_ID)
+    if transcript_ID:
+        transcript_query = transcript_query.where(models.Transcript.transcript_ID == transcript_ID)
+    if enst_number:
+        transcript_query = transcript_query.where(models.Transcript.enst_number == enst_number)
+
+    # Build the alternative splicing events query
+    as_query = db.select(models.AlternativeSplicingEventTranscripts.alternative_splicing_event_transcripts_ID).where(
+        models.AlternativeSplicingEventTranscripts.transcript_ID.in_(transcript_query)
+    )
+    if alternative_splicing_event_transcripts_ID:
+        as_query = as_query.where(
+            models.AlternativeSplicingEventTranscripts.alternative_splicing_event_transcripts_ID == alternative_splicing_event_transcripts_ID
+        )
+
+    # Build the psi values query
+    psi_query = db.select(models.PsiVec).where(
+        models.PsiVec.alternative_splicing_event_transcripts_ID.in_(as_query)
+    )
+    if psivec_ID:
+        psi_query = psi_query.where(models.PsiVec.psivec_ID == psivec_ID)
+    if sample_ID:
+        psi_query = psi_query.where(models.PsiVec.sample_ID == sample_ID)
+
+    # Apply limit and sort results
+    psi_query = psi_query.order_by(models.PsiVec.psi_value.desc()).limit(limit)
+
+    psi_values = db.session.execute(psi_query).scalars().all()
+
+    if psi_values:
+        schema = models.PsiVecSchema(many=True)
+        return schema.dump(psi_values)
+    else:
+        abort(404, "No data found that satisfies the given filters")
+
diff --git a/app/controllers/comparison.py b/app/controllers/comparison.py
@@ -44,20 +44,26 @@ def _comparison_query(dataset_1, dataset_2, condition_1=None, condition_2=None,
     if condition_2 is not None:
         comparison = comparison.filter(models.Comparison.condition_2 == condition_2)
 
-    comparison = comparison.all()
+    comparisons = comparison.all()
 
     # check if comparison is named differently 
-    if len(comparison) == 0:
+    if len(comparisons) == 0:
         reverse = True
         comparison = models.Comparison.query \
             .filter(models.Comparison.dataset_ID_1.in_(dataset_2)) \
             .filter(models.Comparison.dataset_ID_2.in_(dataset_1)) \
             .filter(models.Comparison.gene_transcript == gene_transcript) 
-
-    if len(comparison) != 1:
-        abort(404, "No (unique) comparison found for given inputs")
 
-    return comparison.all(), reverse
+        comparisons = comparison.all()    
+
+    # error if no comparison found
+    if len(comparisons) == 0:
+        abort(404, "No comparison found for given inputs")
+
+    if len(comparisons) > 1:
+        abort(404, "Multiple comparisons found for given inputs")
+
+    return comparisons, reverse
 
 
 def get_comparison(dataset_ID: str = None, disease_name: str = None, disease_subtype=None, sponge_db_version: int = LATEST):

diff --git a/app/controllers/geneInteraction.py b/app/controllers/geneInteraction.py
@@ -650,88 +650,53 @@ def read_mirna_for_specific_interaction(dataset_ID: int = None, disease_name=Non
     :param sponge_db_version: version of the sponge database
     :return: all miRNAs contributing to the interactions between genes of interest
     """
-    # test if any of the two identification possibilites is given
-    if ensg_number is None and gene_symbol is None:
-        abort(404, "One of the two possible identification numbers must be provided")
-
-    if ensg_number is not None and gene_symbol is not None:
-        abort(404,
-              "More than one identifikation paramter is given. Please choose one out of (ensg number, gene symbol)")
-
-    # get all sponge_runs for the given sponge_db_version
-    run = models.SpongeRun.query \
-        .filter(models.SpongeRun.sponge_db_version == sponge_db_version)
 
-    queries = []
-    run_IDs = []
-    # if specific disease_name is given:
+    # get diseases 
+    disease_query = db.select(models.Dataset.dataset_ID).where(models.Dataset.sponge_db_version == sponge_db_version)
     if disease_name is not None:
-        run = models.SpongeRun.query.join(models.Dataset, models.Dataset.dataset_ID == models.SpongeRun.dataset_ID) \
-            .filter(models.Dataset.disease_name.like("%" + disease_name + "%"))
-
+        disease_query = disease_query.where(models.Dataset.disease_name.like("%" + disease_name + "%"))
     if dataset_ID is not None:
-        run = run.filter(models.Dataset.dataset_ID == dataset_ID)
-
-    run = run.all()
+        disease_query = disease_query.where(models.Dataset.dataset_ID == dataset_ID)
 
-    if len(run) > 0:
-        run_IDs = [i.sponge_run_ID for i in run]
-        queries.append(models.miRNAInteraction.sponge_run_ID.in_(run_IDs))
-    else:
-        abort(404, "No dataset with given disease_name found")
+    # filter runs for diseases
+    run_query = db.select(models.SpongeRun.sponge_run_ID).where(models.SpongeRun.dataset_ID.in_(disease_query))
 
-    gene = []
-    # if ensg_numer is given to specify gene(s), get the intern gene_ID(primary_key) for requested ensg_nr(gene_ID)
+    # get gene IDs 
+    gene_query = db.select(models.Gene.gene_ID)
     if ensg_number is not None:
-        gene = models.Gene.query \
-            .filter(models.Gene.ensg_number.in_(ensg_number)) \
-            .all()
-    # if gene_symbol is given to specify gene(s), get the intern gene_ID(primary_key) for requested gene_symbol(gene_ID)
-    elif gene_symbol is not None:
-        gene = models.Gene.query \
-            .filter(models.Gene.gene_symbol.in_(gene_symbol)) \
-            .all()
+        gene_query = gene_query.where(models.Gene.ensg_number.in_(ensg_number))
+    if gene_symbol is not None:
+        gene_query = gene_query.where(models.Gene.gene_symbol.in_(gene_symbol))
 
-    gene_IDs = []
-    if len(gene) > 0:
-        gene_IDs = [i.gene_ID for i in gene]
-        queries.append(models.miRNAInteraction.gene_ID.in_(gene_IDs))
-    else:
-        abort(404, "No gene found for given identifiers.")
+    # Get all interactions for the given genes and runs
+    base_interaction_query = db.select(models.miRNAInteraction).where(
+        models.miRNAInteraction.gene_ID.in_(gene_query),
+        models.miRNAInteraction.sponge_run_ID.in_(run_query),
+    )
 
-    interaction_result = []
     if between:
-        # an Engine, which the Session will use for connection resources
-        some_engine = sa.create_engine(os.getenv("SPONGE_DB_URI"), pool_recycle=30)
-
-        # create a configured "Session" class
-        Session = sa.orm.sessionmaker(bind=some_engine)
-
-        # create a Session
-        session = Session()
-        # test for each dataset if the gene(s) of interest are included in the ceRNA network
-
-        mirna_filter = session.execute(text("select mirna_ID from interactions_genemirna where sponge_run_ID IN ( "
-                                       + ','.join(str(e) for e in run_IDs) + ") and gene_ID IN ( "
-                                       + ','.join(str(e) for e in gene_IDs)
-                                       + ") group by mirna_ID HAVING count(mirna_ID) >= 2;")).fetchall()
-
-        session.close()
-        some_engine.dispose()
-
-        if len(mirna_filter) == 0:
-            abort(404, "No shared miRNA between genes found.")
+        # Subquery to count distinct genes
+        distinct_gene_count_subquery = (
+            db.select(db.func.count(db.func.distinct(gene_query.c.gene_ID))).scalar_subquery()
+        )
+        print(distinct_gene_count_subquery)
+
+        # Subquery to get miRNA IDs that meet the 'between' condition
+        mirna_query = db.select(models.miRNAInteraction.miRNA_ID) \
+            .where(models.miRNAInteraction.gene_ID.in_(gene_query)) \
+            .where(models.miRNAInteraction.sponge_run_ID.in_(run_query)) \
+            .group_by(models.miRNAInteraction.miRNA_ID) \
+            .having(db.func.count(models.miRNAInteraction.gene_ID) == distinct_gene_count_subquery)
+
+        # Filter interactions by the miRNA IDs from the previous subquery
+        interaction_query = base_interaction_query.where(
+            models.miRNAInteraction.miRNA_ID.in_(mirna_query)
+        )
+    else:
+            interaction_query = base_interaction_query
 
-        flat_mirna_filter = [item for sublist in mirna_filter for item in sublist]
-        queries.append(models.miRNAInteraction.miRNA_ID.in_(flat_mirna_filter))
+    interaction_result = db.session.execute(interaction_query).scalars().all()
 
-        interaction_result = models.miRNAInteraction.query \
-            .filter(*queries) \
-            .all()
-    else:
-        interaction_result = models.miRNAInteraction.query \
-            .filter(*queries) \
-            .all()
 
     if len(interaction_result) > 0:
         # Serialize the data for the response depending on parameter all