Skip to content

Commit

Permalink
Revert "Text annotation v3 (#522)"
Browse files Browse the repository at this point in the history
This reverts commit 3b0b0cc.
  • Loading branch information
glass-ships committed Jan 17, 2024
1 parent 28ab311 commit 994b3dc
Show file tree
Hide file tree
Showing 13 changed files with 114 additions and 727 deletions.
551 changes: 22 additions & 529 deletions backend/poetry.lock

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,7 @@ requests = "^2.28.1"
rich = "*"
typer = "^0.7.0"
typer-cli = "^0.0.13"
spacy = ">=3.6.1,<3.8.0"

[tool.poetry.dependencies.en_core_sci_sm]
url="https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.3/en_core_sci_sm-0.5.3.tar.gz"

[tool.poetry.group.dev.dependencies]
pytest = "^7.2.0"
Expand Down
10 changes: 2 additions & 8 deletions backend/src/monarch_py/api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from pydantic import BaseModel

# from pydantic_settings import BaseSettings

from monarch_py.implementations.solr.solr_implementation import SolrImplementation
from monarch_py.implementations.spacy.spacy_implementation import SpacyImplementation
from monarch_py.datamodels.model import TermSetPairwiseSimilarity, SemsimSearchResult


Expand Down Expand Up @@ -115,10 +116,3 @@ def oak():
# oak_implementation = OakImplementation()
# oak_implementation.init_phenio_adapter(force_update=False, phenio_path=settings.phenio_db_path)
# return oak_implementation


@lru_cache(maxsize=1)
def spacyner():
spacy_implementation = SpacyImplementation()
spacy_implementation.init_spacy(search_engine=solr())
return spacy_implementation
3 changes: 1 addition & 2 deletions backend/src/monarch_py/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
from monarch_py.api import association, entity, histopheno, search, semsim, text_annotation
from monarch_py.api.config import semsimian, spacyner
from monarch_py.api.config import semsimian
from monarch_py.api.middleware.logging_middleware import LoggingMiddleware

PREFIX = "/v3/api"
Expand All @@ -17,7 +17,6 @@
@app.on_event("startup")
async def initialize_app():
semsimian()
spacyner()
# oak()


Expand Down
31 changes: 11 additions & 20 deletions backend/src/monarch_py/api/text_annotation.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,19 @@
from fastapi import APIRouter, Path, Response
from typing import List
from fastapi import APIRouter, Path # , Depends, HTTPException, Query

from monarch_py.api.additional_models import TextAnnotationRequest
from monarch_py.api.config import spacyner
from monarch_py.datamodels.model import TextAnnotationResult
from monarch_py.api.config import oak

router = APIRouter(tags=["text_annotation"], responses={404: {"description": "Not Found"}})


@router.get("/annotate/{content}")
def _annotate(content: str = Path(title="The text content to annotate")) -> str:
return Response(content=spacyner().annotate_text(content), media_type="text/html")
@router.get("/annotate/{content}", include_in_schema=False)
def _annotate(content: str = Path(title="The text content to annotate")):
print(f"\n\nRunning oak annotate (GET):\n{content}\n")
return oak().annotate_text(content)


@router.post("/annotate")
def _post_annotate(request: TextAnnotationRequest) -> str:
return _annotate(request.content)


@router.get("/annotate/entities")
def _entities(request: TextAnnotationRequest) -> List[TextAnnotationResult]:
return spacyner().get_annotated_entities(request.content)


@router.post("/annotate/entities")
def _post_entities(request: TextAnnotationRequest) -> List[TextAnnotationResult]:
return _entities(request)
@router.post("/annotate", include_in_schema=False)
def _post_annotate(request: TextAnnotationRequest):
print(f"\n\nRunning oak annotate (POST):\n{request.content}\n")
# print(request.content.split("\n"))
return oak().annotate_text(request.content)
75 changes: 75 additions & 0 deletions backend/src/monarch_py/api/utils/get_text_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from monarch_py.api.config import oak
import re

phenio_adapter = oak().phenio_adapter


def annotate_text(text):
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
result = ""
for sentence in sentences:
entities = []
for ann in phenio_adapter.annotate_text(sentence): # type: ignore
if len(ann.object_label) >= 4:
element = [ann.subject_start, ann.subject_end, str(ann.object_label) + "," + str(ann.object_id)]
if (get_word_length(sentence, ann.subject_start - 1) - len(ann.object_label)) < 2:
entities.append(element)
try:
# Trying to access an element that doesn't exist in the list
entities.sort()
entities = concatenate_same_entities(entities)
entities = concatenate_ngram_entities(entities)
replaced_text = replace_entities(sentence, entities)
result += replaced_text + " "
except IndexError as error:
# Handling the list index out of range error
result += sentence + " "
print("Error occurred:", error)
return result


def get_word_length(text, start):
word = ""
index = start
while index < len(text) and text[index].isalpha():
word += text[index]
index += 1
return len(word)


def concatenate_same_entities(lst):
result = {}
for elem in lst:
key = (elem[0], elem[1])
if key in result:
result[key] += "|" + elem[2]
else:
result[key] = elem[2]
concatenated_list = [[key[0], key[1], value] for key, value in result.items()]
return concatenated_list


def concatenate_ngram_entities(lst):
merged_list = []
start, end, text = lst[0]
for element in lst[1:]:
if element[0] <= end: # Check if range overlaps
end = max(end, element[1]) # Merge the range
text += "|" + element[2] # Concatenate the texts
else:
merged_list.append([start, end, text]) # Add the merged element to the result
start, end, text = element # Move to the next element
merged_list.append([start, end, text]) # Add the last merged element
return merged_list


def replace_entities(text, entities):
replaced_text = text
# Sort the entities in descending order of start character indices
entities = sorted(entities, key=lambda x: x[0], reverse=True)
for entity in entities:
start, end = entity[0] - 1, entity[1]
# entity_value = entity[2]
entity_value = f'<span class="sciCrunchAnnotation" data-sciGraph="{entity[2]}">{text[start:end]}</span>'
replaced_text = replaced_text[:start] + entity_value + replaced_text[end:]
return replaced_text
9 changes: 0 additions & 9 deletions backend/src/monarch_py/datamodels/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,14 +595,6 @@ class SearchResults(Results):
total: int = Field(..., description="""total number of items matching a query""")


class TextAnnotationResult(ConfiguredBaseModel):

text: Optional[str] = Field(None, description="""text without tokens""")
tokens: Optional[List[Entity]] = Field(default_factory=list, description="""A collection of entities or concepts""")
start: Optional[int] = Field(None, description="""start position of the annotation""")
end: Optional[int] = Field(None, description="""end position of the annotation""")


class PairwiseSimilarity(ConfiguredBaseModel):
"""
Abstract grouping for representing individual pairwise similarities
Expand Down Expand Up @@ -707,7 +699,6 @@ class SemsimSearchResult(ConfiguredBaseModel):
MultiEntityAssociationResults.model_rebuild()
SearchResult.model_rebuild()
SearchResults.model_rebuild()
TextAnnotationResult.model_rebuild()
PairwiseSimilarity.model_rebuild()
TermPairwiseSimilarity.model_rebuild()
TermSetPairwiseSimilarity.model_rebuild()
Expand Down
24 changes: 0 additions & 24 deletions backend/src/monarch_py/datamodels/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,6 @@ classes:
items:
range: SearchResult

TextAnnotationResult:
slots:
- text
- tokens
- start
- end

slots:
aggregator_knowledge_source:
multivalued: true
Expand Down Expand Up @@ -643,20 +636,3 @@ slots:
# object label is already included in this schema
mapping_justification:
range: string
# Text annotation
text:
description: text without tokens
range: string
inlined: true
tokens:
description: A collection of entities or concepts
range: Entity
inlined: true
inlined_as_list: true
multivalued: true
start:
description: start position of the annotation
range: integer
end:
description: end position of the annotation
range: integer
11 changes: 0 additions & 11 deletions backend/src/monarch_py/interfaces/text_annotation_interface.py

This file was deleted.

108 changes: 0 additions & 108 deletions backend/tests/integration/test_spacy_implementation.py

This file was deleted.

11 changes: 0 additions & 11 deletions frontend/src/api/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -516,17 +516,6 @@ export interface SearchResults extends Results {
/** total number of items matching a query */
total: number,
};

export interface TextAnnotationResult {
/** text without tokens */
text?: string,
/** A collection of entities or concepts */
tokens?: Entity[],
/** start position of the annotation */
start?: number,
/** end position of the annotation */
end?: number,
};
/**
* Abstract grouping for representing individual pairwise similarities
*/
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/api/text-annotator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export const annotateText = async (content = ""): Promise<Annotations> => {
};

/** make query */
const url = `${apiUrl}/annotate/entities`;
const url = `${apiUrl}/annotate`;
const response = await request<_Annotations>(url, params, options);

const transformedResponse = response.map((item) => ({
Expand Down
3 changes: 2 additions & 1 deletion frontend/src/pages/explore/tabs.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"id": "text-annotator",
"text": "Text Annotator",
"icon": "subscript",
"tooltip": "Annotate text with concepts from our knowledge graph"
"tooltip": "Coming soon!",
"disabled": true
},
{
"id": "phenotype-explorer",
Expand Down

0 comments on commit 994b3dc

Please sign in to comment.