-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
13 changed files
with
114 additions
and
727 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,19 @@ | ||
from fastapi import APIRouter, Path, Response | ||
from typing import List | ||
from fastapi import APIRouter, Path # , Depends, HTTPException, Query | ||
|
||
from monarch_py.api.additional_models import TextAnnotationRequest | ||
from monarch_py.api.config import spacyner | ||
from monarch_py.datamodels.model import TextAnnotationResult | ||
from monarch_py.api.config import oak | ||
|
||
router = APIRouter(tags=["text_annotation"], responses={404: {"description": "Not Found"}}) | ||
|
||
|
||
@router.get("/annotate/{content}") | ||
def _annotate(content: str = Path(title="The text content to annotate")) -> str: | ||
return Response(content=spacyner().annotate_text(content), media_type="text/html") | ||
@router.get("/annotate/{content}", include_in_schema=False) | ||
def _annotate(content: str = Path(title="The text content to annotate")): | ||
print(f"\n\nRunning oak annotate (GET):\n{content}\n") | ||
return oak().annotate_text(content) | ||
|
||
|
||
@router.post("/annotate") | ||
def _post_annotate(request: TextAnnotationRequest) -> str: | ||
return _annotate(request.content) | ||
|
||
|
||
@router.get("/annotate/entities") | ||
def _entities(request: TextAnnotationRequest) -> List[TextAnnotationResult]: | ||
return spacyner().get_annotated_entities(request.content) | ||
|
||
|
||
@router.post("/annotate/entities") | ||
def _post_entities(request: TextAnnotationRequest) -> List[TextAnnotationResult]: | ||
return _entities(request) | ||
@router.post("/annotate", include_in_schema=False) | ||
def _post_annotate(request: TextAnnotationRequest): | ||
print(f"\n\nRunning oak annotate (POST):\n{request.content}\n") | ||
# print(request.content.split("\n")) | ||
return oak().annotate_text(request.content) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
from monarch_py.api.config import oak | ||
import re | ||
|
||
phenio_adapter = oak().phenio_adapter | ||
|
||
|
||
def annotate_text(text): | ||
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text) | ||
result = "" | ||
for sentence in sentences: | ||
entities = [] | ||
for ann in phenio_adapter.annotate_text(sentence): # type: ignore | ||
if len(ann.object_label) >= 4: | ||
element = [ann.subject_start, ann.subject_end, str(ann.object_label) + "," + str(ann.object_id)] | ||
if (get_word_length(sentence, ann.subject_start - 1) - len(ann.object_label)) < 2: | ||
entities.append(element) | ||
try: | ||
# Trying to access an element that doesn't exist in the list | ||
entities.sort() | ||
entities = concatenate_same_entities(entities) | ||
entities = concatenate_ngram_entities(entities) | ||
replaced_text = replace_entities(sentence, entities) | ||
result += replaced_text + " " | ||
except IndexError as error: | ||
# Handling the list index out of range error | ||
result += sentence + " " | ||
print("Error occurred:", error) | ||
return result | ||
|
||
|
||
def get_word_length(text, start): | ||
word = "" | ||
index = start | ||
while index < len(text) and text[index].isalpha(): | ||
word += text[index] | ||
index += 1 | ||
return len(word) | ||
|
||
|
||
def concatenate_same_entities(lst): | ||
result = {} | ||
for elem in lst: | ||
key = (elem[0], elem[1]) | ||
if key in result: | ||
result[key] += "|" + elem[2] | ||
else: | ||
result[key] = elem[2] | ||
concatenated_list = [[key[0], key[1], value] for key, value in result.items()] | ||
return concatenated_list | ||
|
||
|
||
def concatenate_ngram_entities(lst): | ||
merged_list = [] | ||
start, end, text = lst[0] | ||
for element in lst[1:]: | ||
if element[0] <= end: # Check if range overlaps | ||
end = max(end, element[1]) # Merge the range | ||
text += "|" + element[2] # Concatenate the texts | ||
else: | ||
merged_list.append([start, end, text]) # Add the merged element to the result | ||
start, end, text = element # Move to the next element | ||
merged_list.append([start, end, text]) # Add the last merged element | ||
return merged_list | ||
|
||
|
||
def replace_entities(text, entities): | ||
replaced_text = text | ||
# Sort the entities in descending order of start character indices | ||
entities = sorted(entities, key=lambda x: x[0], reverse=True) | ||
for entity in entities: | ||
start, end = entity[0] - 1, entity[1] | ||
# entity_value = entity[2] | ||
entity_value = f'<span class="sciCrunchAnnotation" data-sciGraph="{entity[2]}">{text[start:end]}</span>' | ||
replaced_text = replaced_text[:start] + entity_value + replaced_text[end:] | ||
return replaced_text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 0 additions & 11 deletions
11
backend/src/monarch_py/interfaces/text_annotation_interface.py
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters