Skip to content

Commit

Permalink
Merge pull request #46 from daisybio/modules
Browse files Browse the repository at this point in the history
AS events now also return IDs
  • Loading branch information
strasserle authored Dec 20, 2024
2 parents 447ec8c + 92a257d commit 2519655
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 106 deletions.
18 changes: 12 additions & 6 deletions app/controllers/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,26 @@ def _comparison_query(dataset_1, dataset_2, condition_1=None, condition_2=None,
if condition_2 is not None:
comparison = comparison.filter(models.Comparison.condition_2 == condition_2)

comparison = comparison.all()
comparisons = comparison.all()

# check if comparison is named differently
if len(comparison) == 0:
if len(comparisons) == 0:
reverse = True
comparison = models.Comparison.query \
.filter(models.Comparison.dataset_ID_1.in_(dataset_2)) \
.filter(models.Comparison.dataset_ID_2.in_(dataset_1)) \
.filter(models.Comparison.gene_transcript == gene_transcript)

if len(comparison) != 1:
abort(404, "No (unique) comparison found for given inputs")

return comparison.all(), reverse
comparisons = comparison.all()

# error if no comparison found
if len(comparisons) == 0:
abort(404, "No comparison found for given inputs")

if len(comparisons) > 1:
abort(404, "Multiple comparisons found for given inputs")

return comparisons, reverse


def get_comparison(dataset_ID: str = None, disease_name: str = None, disease_subtype=None, sponge_db_version: int = LATEST):
Expand Down
107 changes: 36 additions & 71 deletions app/controllers/geneInteraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,88 +650,53 @@ def read_mirna_for_specific_interaction(dataset_ID: int = None, disease_name=Non
:param sponge_db_version: version of the sponge database
:return: all miRNAs contributing to the interactions between genes of interest
"""
# test if any of the two identification possibilites is given
if ensg_number is None and gene_symbol is None:
abort(404, "One of the two possible identification numbers must be provided")

if ensg_number is not None and gene_symbol is not None:
abort(404,
"More than one identifikation paramter is given. Please choose one out of (ensg number, gene symbol)")

# get all sponge_runs for the given sponge_db_version
run = models.SpongeRun.query \
.filter(models.SpongeRun.sponge_db_version == sponge_db_version)

queries = []
run_IDs = []
# if specific disease_name is given:
# get diseases
disease_query = db.select(models.Dataset.dataset_ID).where(models.Dataset.sponge_db_version == sponge_db_version)
if disease_name is not None:
run = models.SpongeRun.query.join(models.Dataset, models.Dataset.dataset_ID == models.SpongeRun.dataset_ID) \
.filter(models.Dataset.disease_name.like("%" + disease_name + "%"))

disease_query = disease_query.where(models.Dataset.disease_name.like("%" + disease_name + "%"))
if dataset_ID is not None:
run = run.filter(models.Dataset.dataset_ID == dataset_ID)

run = run.all()
disease_query = disease_query.where(models.Dataset.dataset_ID == dataset_ID)

if len(run) > 0:
run_IDs = [i.sponge_run_ID for i in run]
queries.append(models.miRNAInteraction.sponge_run_ID.in_(run_IDs))
else:
abort(404, "No dataset with given disease_name found")
# filter runs for diseases
run_query = db.select(models.SpongeRun.sponge_run_ID).where(models.SpongeRun.dataset_ID.in_(disease_query))

gene = []
# if ensg_numer is given to specify gene(s), get the intern gene_ID(primary_key) for requested ensg_nr(gene_ID)
# get gene IDs
gene_query = db.select(models.Gene.gene_ID)
if ensg_number is not None:
gene = models.Gene.query \
.filter(models.Gene.ensg_number.in_(ensg_number)) \
.all()
# if gene_symbol is given to specify gene(s), get the intern gene_ID(primary_key) for requested gene_symbol(gene_ID)
elif gene_symbol is not None:
gene = models.Gene.query \
.filter(models.Gene.gene_symbol.in_(gene_symbol)) \
.all()
gene_query = gene_query.where(models.Gene.ensg_number.in_(ensg_number))
if gene_symbol is not None:
gene_query = gene_query.where(models.Gene.gene_symbol.in_(gene_symbol))

gene_IDs = []
if len(gene) > 0:
gene_IDs = [i.gene_ID for i in gene]
queries.append(models.miRNAInteraction.gene_ID.in_(gene_IDs))
else:
abort(404, "No gene found for given identifiers.")
# Get all interactions for the given genes and runs
base_interaction_query = db.select(models.miRNAInteraction).where(
models.miRNAInteraction.gene_ID.in_(gene_query),
models.miRNAInteraction.sponge_run_ID.in_(run_query),
)

interaction_result = []
if between:
# an Engine, which the Session will use for connection resources
some_engine = sa.create_engine(os.getenv("SPONGE_DB_URI"), pool_recycle=30)

# create a configured "Session" class
Session = sa.orm.sessionmaker(bind=some_engine)

# create a Session
session = Session()
# test for each dataset if the gene(s) of interest are included in the ceRNA network

mirna_filter = session.execute(text("select mirna_ID from interactions_genemirna where sponge_run_ID IN ( "
+ ','.join(str(e) for e in run_IDs) + ") and gene_ID IN ( "
+ ','.join(str(e) for e in gene_IDs)
+ ") group by mirna_ID HAVING count(mirna_ID) >= 2;")).fetchall()

session.close()
some_engine.dispose()

if len(mirna_filter) == 0:
abort(404, "No shared miRNA between genes found.")
# Subquery to count distinct genes
distinct_gene_count_subquery = (
db.select(db.func.count(db.func.distinct(gene_query.c.gene_ID))).scalar_subquery()
)
print(distinct_gene_count_subquery)

# Subquery to get miRNA IDs that meet the 'between' condition
mirna_query = db.select(models.miRNAInteraction.miRNA_ID) \
.where(models.miRNAInteraction.gene_ID.in_(gene_query)) \
.where(models.miRNAInteraction.sponge_run_ID.in_(run_query)) \
.group_by(models.miRNAInteraction.miRNA_ID) \
.having(db.func.count(models.miRNAInteraction.gene_ID) == distinct_gene_count_subquery)

# Filter interactions by the miRNA IDs from the previous subquery
interaction_query = base_interaction_query.where(
models.miRNAInteraction.miRNA_ID.in_(mirna_query)
)
else:
interaction_query = base_interaction_query

flat_mirna_filter = [item for sublist in mirna_filter for item in sublist]
queries.append(models.miRNAInteraction.miRNA_ID.in_(flat_mirna_filter))
interaction_result = db.session.execute(interaction_query).scalars().all()

interaction_result = models.miRNAInteraction.query \
.filter(*queries) \
.all()
else:
interaction_result = models.miRNAInteraction.query \
.filter(*queries) \
.all()

if len(interaction_result) > 0:
# Serialize the data for the response depending on parameter all
Expand Down
36 changes: 22 additions & 14 deletions app/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from marshmallow import fields, Schema
from marshmallow import fields, Schema, post_dump
from sqlalchemy.orm import relationship

from app.config import db, ma
Expand Down Expand Up @@ -567,6 +567,11 @@ class Gsea(db.Model):
fwerp = db.Column(db.Float)
gene_percent = db.Column(db.Float)

lead_genes = relationship("GseaLeadGenes", back_populates="gsea", lazy="select")
matched_genes = relationship("GseaMatchedGenes")
ranking_genes = relationship("GseaRankingGenes")


class GseaLeadGenes(db.Model):
__tablename__ = "gsea_lead_genes"

Expand All @@ -575,8 +580,8 @@ class GseaLeadGenes(db.Model):
gsea_ID = db.Column(db.Integer, db.ForeignKey('gsea.gsea_ID'))
gsea = relationship("Gsea", foreign_keys=[gsea_ID])

gene_ID = db.Column(db.Integer, db.ForeignKey('gene.gene_ID'))
gene_symbol = relationship("Gene", foreign_keys=[gene_ID])
gene_ID = db.Column(db.Integer, db.ForeignKey('gene.gene_ID'), nullable=False)
gene = relationship("Gsea", back_populates="lead_genes")

class GseaMatchedGenes(db.Model):
__tablename__ = "gsea_matched_genes"
Expand All @@ -586,8 +591,8 @@ class GseaMatchedGenes(db.Model):
gsea_ID = db.Column(db.Integer, db.ForeignKey('gsea.gsea_ID'))
gsea = relationship("Gsea", foreign_keys=[gsea_ID])

gene_ID = db.Column(db.Integer, db.ForeignKey('gene.gene_ID'))
gene_symbol = relationship("Gene", foreign_keys=[gene_ID])
gene_ID = db.Column(db.Integer, db.ForeignKey('gene.gene_ID'), nullable=False)
gene = relationship("Gene", foreign_keys=[gene_ID])

class GseaRes(db.Model):
__tablename__ = "gsea_res"
Expand Down Expand Up @@ -833,7 +838,7 @@ class Meta:
sql_session = db.session
fields = ["dataset", "gene", "pValue"]

dataset = ma.Nested(lambda: DatasetSchema(only=("dataet_ID", "disease_name")))
dataset = ma.Nested(lambda: DatasetSchema(only=("dataset_ID", "disease_name")))
gene = ma.Nested(lambda: GeneSchema(only=("ensg_number", "gene_symbol")))

class checkGeneInteractionProCancer(ma.SQLAlchemyAutoSchema):
Expand Down Expand Up @@ -1012,7 +1017,7 @@ class AlternativeSplicingEventsTranscriptsSchema(ma.SQLAlchemyAutoSchema):
class Meta:
model = AlternativeSplicingEventTranscripts
sqla_session = db.session
fields = ["transcript", "event_name", "event_type"]
fields = ['alternative_splicing_event_transcripts_ID', "transcript", "event_name", "event_type"]

transcript = ma.Nested(lambda: TranscriptSchema(only=("enst_number", )))

Expand Down Expand Up @@ -1126,34 +1131,37 @@ class GseaLeadGenesSchema(ma.SQLAlchemyAutoSchema):
class Meta:
model = GseaLeadGenes
sqla_session = db.session
fields = ["gsea_lead_genes_ID", "gene"]

gene_symbol = ma.Nested(lambda: GeneSchemaShort(only=("gene_symbol")))
gene = ma.Nested(lambda: GeneSchema(only=("ensg_number", "gene_symbol")))


class GseaMatchedGenesSchema(ma.SQLAlchemyAutoSchema):
class Meta:
model = GseaLeadGenes
model = GseaMatchedGenes
sqla_session = db.session
fields = ["gsea_matched_genes_ID", "gene"]

gene_symbol = ma.Nested(lambda: GeneSchemaShort(only=("gene_symbol")))
gene = ma.Nested(lambda: GeneSchema(only=("ensg_number", "gene_symbol")))

class GseaSchema(ma.SQLAlchemyAutoSchema):
class Meta:
model = Gsea
sqla_session = db.session
load_instance = True
fields = ["term", "es", "nes", "pvalue", "fdr", "fwerp", "gene_percent", "lead_genes", "matched_genes", "res"]

lead_genes = ma.Nested(lambda: GseaLeadGenesSchema(only=("gene_symbol")), many=True)
matched_genes = ma.Nested(lambda: GseaMatchedGenesSchema(only=("gene_symbol")), many=True)
res = ma.Nested(lambda: GseaResSchema(only=("res_ID", "score")), many=True)
lead_genes = ma.Nested(GseaLeadGenesSchema, many=True)
matched_genes = ma.Nested(GseaMatchedGenesSchema, many=True)
res = ma.Nested(lambda: GseaResSchema(only=("res_ID", "score")))

class GseaSchemaPlot(ma.SQLAlchemyAutoSchema):
class Meta:
model = Gsea
sqla_session = db.session
fields = ["term", "nes", "pvalue", "fdr", "res", "matched_genes", "gsea_ranking_genes"]

res = ma.Nested(lambda: GseaResSchema(only=("res_ID", "score")), many=True)
res = ma.Nested(lambda: GseaResSchema(only=("res_ID", "score")))

class GseaTermsSchema(ma.SQLAlchemyAutoSchema):
class Meta:
Expand Down
Loading

0 comments on commit 2519655

Please sign in to comment.