diff --git a/.gitignore b/.gitignore index ecee2bf..f824f4c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,7 @@ /coverage # production -/build +build/ # misc .DS_Store @@ -36,4 +36,5 @@ simple-backend/db-old.sqlite3 .eslintcache stave.iml -package-lock.json \ No newline at end of file +package-lock.json +*.egg-info \ No newline at end of file diff --git a/setup.py b/setup.py index b3e1a61..2f499f6 100644 --- a/setup.py +++ b/setup.py @@ -27,11 +27,9 @@ 'requests==2.25.1', 'django>=3.0.4', 'django-guardian==2.3.0', - 'tornado==6.1' + 'tornado==6.1', + 'forte', ], - extras_require={ - "forte": ["forte"], - }, entry_points={ 'console_scripts':[ 'stave = stave_backend.lib.stave_cli:main' diff --git a/simple-backend/sample_sql/stave_backend_project.sql b/simple-backend/sample_sql/stave_backend_project.sql index 15f3225..ba95bca 100644 --- a/simple-backend/sample_sql/stave_backend_project.sql +++ b/simple-backend/sample_sql/stave_backend_project.sql @@ -530,6 +530,12 @@ INSERT INTO "stave_backend_project" ("id","name","ontology","user_id","config", "name": "all_ontology", + "additional_prefixes": [ + + "edu.cmu" + + ], + "definitions": [ { diff --git a/simple-backend/stave_backend/handlers/document.py b/simple-backend/stave_backend/handlers/document.py index 4aa9ed2..4f59dc7 100644 --- a/simple-backend/stave_backend/handlers/document.py +++ b/simple-backend/stave_backend/handlers/document.py @@ -1,14 +1,14 @@ -from django.contrib import admin -from django.urls import include, path from django.http import HttpResponse, JsonResponse, Http404 from django.forms import model_to_dict import uuid import json from django.contrib.auth.decorators import permission_required -from guardian.decorators import permission_required_or_403 -from ..models import Document, User, Project, Job +from ..models import Document, Project, Job from ..lib.require_login import require_login -from ..lib.utils import fetch_doc_check_perm, check_perm_project, fetch_job +from ..lib.utils import ( + fetch_doc_check_perm, check_perm_project, fetch_job +) +from ..lib.stave_pack_parser import StavePackParser @require_login @permission_required('stave_backend.view_document', raise_exception=True) @@ -228,18 +228,15 @@ def new_annotation(request, document_id): # } doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation") - - docJson = model_to_dict(doc) - textPackJson = json.loads(docJson['textPack']) - annotation_id = uuid.uuid4().int received_json_data = json.loads(request.body) annotation = received_json_data.get('data') annotation["py/state"]['_tid'] = annotation_id - textPackJson['py/state']['annotations'].append(annotation) - doc.textPack = json.dumps(textPackJson) + doc.textPack = StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).add_entry_to_doc(entry_dict=annotation) doc.save() return JsonResponse({"id": str(annotation_id)}, safe=False) @@ -270,18 +267,9 @@ def edit_annotation(request, document_id, annotation_id): OK if succeeded, otherwise forbidden or not found """ doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation") - - received_json_data = json.loads(request.body) - annotation = received_json_data.get('data') - - docJson = model_to_dict(doc) - textPackJson = json.loads(docJson['textPack']) - - for index, item in enumerate(textPackJson['py/state']['annotations']): - if item["py/state"]['_tid'] == annotation_id: - textPackJson['py/state']['annotations'][index] = annotation - - doc.textPack = json.dumps(textPackJson) + doc.textPack = StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).edit_entry_in_doc(entry_dict=json.loads(request.body).get('data')) doc.save() return HttpResponse('OK') @@ -310,17 +298,9 @@ def delete_annotation(request, document_id, annotation_id): # if doc doesn't exist doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation") - - docJson = model_to_dict(doc) - textPackJson = json.loads(docJson['textPack']) - - deleteIndex = -1 - for index, item in enumerate(textPackJson['py/state']['annotations']): - if item["py/state"]['_tid'] == annotation_id: - deleteIndex = index - - del textPackJson['py/state']['annotations'][deleteIndex] - doc.textPack = json.dumps(textPackJson) + doc.textPack = StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).delete_annotation_from_doc(entry_tid=annotation_id) doc.save() return HttpResponse('OK') @@ -371,10 +351,9 @@ def new_link(request, document_id): link = received_json_data.get('data') link["py/state"]['_tid'] = link_id - docJson = model_to_dict(doc) - textPackJson = json.loads(docJson['textPack']) - textPackJson['py/state']['links'].append(link) - doc.textPack = json.dumps(textPackJson) + doc.textPack = StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).add_entry_to_doc(entry_dict=link) doc.save() return JsonResponse({"id": str(link_id)}, safe=False) @@ -397,18 +376,9 @@ def edit_link(request, document_id, link_id): OK if succeeded, otherwise forbidden or not found. """ doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation") - - received_json_data = json.loads(request.body) - link = received_json_data.get('data') - - docJson = model_to_dict(doc) - textPackJson = json.loads(docJson['textPack']) - - for index, item in enumerate(textPackJson['py/state']['links']): - if item["py/state"]['_tid'] == link_id: - textPackJson['py/state']['links'][index] = link - - doc.textPack = json.dumps(textPackJson) + doc.textPack = StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).edit_entry_in_doc(entry_dict=json.loads(request.body).get('data')) doc.save() return HttpResponse('OK') @@ -431,17 +401,9 @@ def delete_link(request, document_id, link_id): OK if succeeded, otherwise forbidden or not found. """ doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation") - - docJson = model_to_dict(doc) - textPackJson = json.loads(docJson['textPack']) - - deleteIndex = -1 - for index, item in enumerate(textPackJson['py/state']['links']): - if item["py/state"]['_tid'] == link_id: - deleteIndex = index - - del textPackJson['py/state']['links'][deleteIndex] - doc.textPack = json.dumps(textPackJson) + doc.textPack = StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).delete_link_from_doc(entry_tid=link_id) doc.save() return HttpResponse('OK') @@ -468,14 +430,11 @@ def get_doc_ontology_pack(request, document_id): """ doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.read_project") - # Convert every large integer to string to prevent precision loss - # In javascript, integers are accurate up to 15 digits. - textPackJson = json.loads(doc.textPack, - parse_int=lambda si: int(si) if len(si) < 15 else si) - docJson = { 'id': document_id, - 'textPack': json.dumps(textPackJson), + 'textPack': json.dumps(StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).transform_pack()), 'ontology': doc.project.ontology } diff --git a/simple-backend/stave_backend/handlers/nlp.py b/simple-backend/stave_backend/handlers/nlp.py index 7295a7a..0a258fc 100644 --- a/simple-backend/stave_backend/handlers/nlp.py +++ b/simple-backend/stave_backend/handlers/nlp.py @@ -9,6 +9,7 @@ from ..models import Document from ..lib.require_login import require_login +from ..lib.stave_pack_parser import StavePackParser forte_msg = "Forte is not installed or imported successfully. To get NLP support from Forte, install it from https://github.com/asyml/forte" forte_installed = False @@ -110,12 +111,14 @@ def run_pipeline(request, document_id: int): if pipeline: processedPack = pipeline.process([docJson['textPack']]) doc.textPack = processedPack.to_string(True) - doc.save() - response = JsonResponse(model_to_dict(doc), safe=False) + doc.save() + docJson = model_to_dict(doc) else: logging.error( f"The NLP model of name {model_name} is not " f"loaded, please check the log for possible reasons." ) - response = JsonResponse(docJson, safe=False) - return response + docJson["textPack"] = json.dumps(StavePackParser( + raw_pack=doc.textPack, raw_ontology=doc.project.ontology + ).transform_pack()) + return JsonResponse(docJson, safe=False) diff --git a/simple-backend/stave_backend/lib/stave_pack_parser.py b/simple-backend/stave_backend/lib/stave_pack_parser.py new file mode 100644 index 0000000..cd40ffe --- /dev/null +++ b/simple-backend/stave_backend/lib/stave_pack_parser.py @@ -0,0 +1,383 @@ +""" +Provide interfaces to parse DataPack and ontology for interaction and +conversion of different data formats between frontend side and backend side. +""" + +import json +from typing import Dict, Optional +from packaging.version import Version +import tempfile + +from forte.data import DataPack, DataStore +from forte.data.ontology import Annotation, Link +from forte.common import constants +from forte.version import PACK_ID_COMPATIBLE_VERSION + + +class StavePackParser: + r""" + StavePackParser provides interfaces to parse DataPack and ontology + specifications for backend APIs to interact with frontend data formats. + Example usage: + + StavePackParser(raw_pack, raw_ontology).transform_pack() + + """ + + ANNOTATION_LIST = "annotations" + LINK_LIST = "links" + + def __init__(self, raw_pack: str, raw_ontology: str) -> None: + """ + Initialize StavePackParser with input paramaters. + + Args: + raw_pack: String of serializaed DataPack. The format should be + compatible with lastest DataPack schema. However, we will still + support some old formats (through some manual parsing) so that + the example projects do not break. + raw_ontology: String of serialized ontology json specification. + """ + # Parse all entries from ontology + with tempfile.NamedTemporaryFile(suffix=".json") as onto_file: + with open(onto_file.name, 'w') as f: + f.write(raw_ontology) + + # Clear the previous type info to avoid conflicts in onto + # definitions + DataStore._type_attributes = {} + + # Populate DataStore._type_attributes with entries defined + # in input ontology spec + self._data_store: DataStore = DataStore( + onto_file_path=onto_file.name + ) + + self._definitions: Dict = { + definition["entry_name"]: definition + for definition in json.loads(raw_ontology).get("definitions", []) + } + self._pack_json: Dict = json.loads(raw_pack) + + # When the input serialized DataPack's format is outdated, we + # set self._pack to None. + self._pack: Optional[DataPack] = None + if Version( + self._pack_json["py/state"].get("pack_version", "0.0.0") + ) >= Version( + PACK_ID_COMPATIBLE_VERSION + ): + # Parse the raw DataPack when its version is compatible + self._pack = DataPack.from_string(raw_pack) + self._data_store = self._pack._data_store + + def transform_pack(self) -> Dict: + """ + Convert DataPack to a json format that can be understood by + frontend utilities. + + Returns: + A dictionary conformed to a schema for frontend rendering. + """ + if self._pack is None: + # Transform DataPack based on legacy format + return self._transform_pack_json() + return self._transform_pack() + + def add_entry_to_doc(self, entry_dict: Dict) -> str: + """ + Add an entry to DataPack. + + Args: + entry_dict: A dictionary containing the information of the new + entry to be added. For example: + + { + 'py/object': annotation.entry_type(), + 'py/state': { + _span: { + begin: annotation.span.begin, + end: annotation.span.end, + 'py/object': 'forte.data.span.Span', + }, + _tid: annotation.tid, + ...annotation.attributes, + }, + } + + Returns: + A string of serialized DataPack with the new entry added. + """ + if self._pack is None: + # Add entry to DataPack with legacy format + if self._data_store._is_subclass( + type_name=entry_dict["py/object"], cls=Annotation + ): + self._pack_json['py/state']["annotations"].append(entry_dict) + elif self._data_store._is_subclass( + type_name=entry_dict["py/object"], cls=Link + ): + self._pack_json['py/state']["links"].append(entry_dict) + return json.dumps(self._pack_json) + else: + # Add entry to DataPack with compatible format + self._add_entry_dict_to_pack(entry_dict=entry_dict) + return self._pack.to_string() + + def edit_entry_in_doc(self, entry_dict: Dict) -> str: + """ + Edit an existing entry in DataPack. + + Args: + entry_dict: A dictionary containing the information of the entry + to be updated. + + Returns: + A string of serialized DataPack with the updated entry. + """ + if self._pack is None: + # Edit entry in DataPack with legacy format + list_name: str = '' + if self._data_store._is_subclass( + type_name=entry_dict["py/object"], cls=Annotation + ): + list_name = "annotations" + elif self._data_store._is_subclass( + type_name=entry_dict["py/object"], cls=Link + ): + list_name = "links" + if list_name: + for index, item in enumerate(self._pack_json['py/state'][list_name]): + if str(item["py/state"]['_tid']) == str(entry_dict["py/state"]['_tid']): + self._pack_json['py/state'][list_name][index] = entry_dict + return json.dumps(self._pack_json) + else: + # Edit entry in DataPack with compatible format + self._pack.delete_entry( + entry=self._pack.get_entry(tid=int(entry_dict["py/state"]['_tid'])) + ) + self._add_entry_dict_to_pack(entry_dict=entry_dict) + return self._pack.to_string() + + def delete_annotation_from_doc(self, entry_tid: str) -> str: + """ + Delete an Annotation entry from DataPack. + + Args: + entry_tid: A string representing the TID of the annotation to be + deleted. + + Returns: + A string of serialized DataPack with the target annotation deleted. + """ + return self._delete_entry_from_doc( + entry_tid=entry_tid, type_list=self.ANNOTATION_LIST + ) + + def delete_link_from_doc(self, entry_tid: str) -> str: + """ + Delete a Link entry from DataPack. + + Args: + entry_tid: A string representing the TID of the link to be + deleted. + + Returns: + A string of serialized DataPack with the target link deleted. + """ + return self._delete_entry_from_doc( + entry_tid=entry_tid, type_list=self.LINK_LIST + ) + + def _transform_pack(self): + """ + Transform a DataPack object to a json for frontend rendering + """ + annotations, links, groups = [], [], [] + for annotation in self._data_store.all_entries( + entry_type_name="forte.data.ontology.top.Annotation" + ): + annotations.append({ + "span": { + "begin": annotation[constants.BEGIN_INDEX], + "end": annotation[constants.END_INDEX], + }, + "id": str(annotation[constants.TID_INDEX]), + "legendId": annotation[constants.ENTRY_TYPE_INDEX], + "attributes": { + attr_name: annotation[attr_index] for ( + attr_name, attr_index + ) in self._data_store._get_type_attribute_dict( + type_name=annotation[constants.ENTRY_TYPE_INDEX] + ).items() + } + }) + + for link in self._data_store.all_entries( + "forte.data.ontology.top.Link" + ): + links.append({ + "id": str(link[constants.TID_INDEX]), + "fromEntryId": str(link[constants.PARENT_TID_INDEX]), + "toEntryId": str(link[constants.CHILD_TID_INDEX]), + "legendId": link[constants.ENTRY_TYPE_INDEX], + "attributes": { + attr_name: link[attr_index] for ( + attr_name, attr_index + ) in self._data_store._get_type_attribute_dict( + type_name=link[constants.ENTRY_TYPE_INDEX] + ).items() + } + }) + + for group in self._data_store.all_entries( + "forte.data.ontology.top.Group" + ): + groups.append({ + "id": str(group[constants.TID_INDEX]), + "members": [str(member) for member in group[constants.MEMBER_TID_INDEX]], + "memberType": group[constants.MEMBER_TYPE_INDEX], + "legendId": group[constants.ENTRY_TYPE_INDEX], + "attributes": { + attr_name: group[attr_index] for ( + attr_name, attr_index + ) in self._data_store._get_type_attribute_dict( + type_name=group[constants.ENTRY_TYPE_INDEX] + ).items() + } + }) + + return { + "text": self._pack.text, + "annotations": annotations, + "links": links, + "groups": groups, + "attributes": self._pack._meta.__dict__.copy() + } + + def _transform_pack_json(self): + """ + Transform a DataPack dictionary with legacy format to a json for + frontend rendering + """ + pack_state: Dict = self._pack_json["py/state"] + + annotations, links, groups = [], [], [] + for annotation in pack_state["annotations"]: + entry_data: Dict = annotation.get("py/state") + if not entry_data: continue + annotations.append({ + "span": { + "begin": entry_data["_span"]["begin"], + "end": entry_data["_span"]["end"], + }, + "id": str(entry_data["_tid"]), + "legendId": annotation.get("py/object"), + "attributes": self._get_attributes(annotation), + }) + for link in pack_state["links"]: + entry_data: Dict = link.get("py/state") + if not entry_data: continue + links.append({ + "id": str(entry_data["_tid"]), + "fromEntryId": str(entry_data["_parent"]), + "toEntryId": str(entry_data["_child"]), + "legendId": link.get("py/object"), + "attributes": self._get_attributes(link), + }) + for group in pack_state["groups"]: + entry_data: Dict = group.get("py/state") + if not entry_data: continue + groups.append({ + "id": str(entry_data["_tid"]), + "members": [str(tid) for tid in entry_data["_members"]["py/set"]], + "memberType": self._get_group_type(group), + "legendId": group.get("py/object"), + "attributes": self._get_attributes(group), + }) + + return { + "text": pack_state["_text"], + "annotations": annotations, + "links": links, + "groups": groups, + "attributes": + # Backward compatibility with Forte formats. + pack_state["meta"]["py/state"] if "meta" in pack_state else pack_state["_meta"]["py/state"], + } + + def _get_attributes(self, entry_dict: Dict): + """ + Get a mapping from attribute names to corresponding values in the input + entry + """ + return { + attribute: entry_dict["py/state"][attribute] + for attribute in self._data_store._get_type_attribute_dict( + type_name=entry_dict["py/object"] + ) if attribute in entry_dict["py/state"] + } + + def _get_group_type(self, group_dict: Dict): + """ + Get the member type of a group entry + """ + member_type: str = self._definitions.get( + group_dict.get("py/object"), {} + ).get("member_type") + if self._data_store._is_subclass( + type_name=member_type, cls=Annotation + ): + return "annotation" + elif self._data_store._is_subclass( + type_name=member_type, cls=Link + ): + return "link" + else: + raise ValueError( + f"Unknown group entry: {group_dict.get('py/object')}" + ) + + def _add_entry_dict_to_pack(self, entry_dict: Dict): + """ + Add entry to DataPack with compatible format + """ + if self._data_store._is_subclass( + type_name=entry_dict["py/object"], cls=Annotation + ): + self._data_store.add_entry_raw( + type_name=entry_dict["py/object"], + attribute_data=[ + int(entry_dict["py/state"]["_span"]["begin"]), + int(entry_dict["py/state"]["_span"]["end"]) + ], + base_class=Annotation, + tid=int(entry_dict["py/state"]["_tid"]) + ) + elif self._data_store._is_subclass( + type_name=entry_dict["py/object"], cls=Link + ): + self._data_store.add_entry_raw( + type_name=entry_dict["py/object"], + attribute_data=[ + int(entry_dict["py/state"]["_parent"]), + int(entry_dict["py/state"]["_child"]) + ], + base_class=Link, + tid=int(entry_dict["py/state"]["_tid"]) + ) + + def _delete_entry_from_doc(self, entry_tid: str, type_list: str): + """ + Delete an entry from DataPack. + """ + if self._pack is None: + for index, item in enumerate(self._pack_json['py/state'][type_list]): + if str(item["py/state"]['_tid']) == str(entry_tid): + delete_index = index + if delete_index >= 0: + del self._pack_json['py/state'][type_list][delete_index] + return json.dumps(self._pack_json) + else: + self._pack.delete_entry(entry=self._pack.get_entry(tid=int(entry_tid))) + return self._pack.to_string() diff --git a/simple-backend/stave_backend/lib/stave_viewer.py b/simple-backend/stave_backend/lib/stave_viewer.py index 798bba8..96c5699 100644 --- a/simple-backend/stave_backend/lib/stave_viewer.py +++ b/simple-backend/stave_backend/lib/stave_viewer.py @@ -34,6 +34,7 @@ from .stave_project import StaveProjectReader from .stave_session import StaveSession from .stave_config import StaveConfig +from .stave_pack_parser import StavePackParser logger = logging.getLogger(__name__) @@ -123,8 +124,9 @@ class PackOntoHandler(ViewerHandler): def get(self, doc_id: str): ontology = json.dumps(self._project_reader.ontology) - textpack = json.dumps( - self._project_reader.get_textpack(int(doc_id))) + textpack = json.dumps(StavePackParser(raw_pack=json.dumps( + self._project_reader.get_textpack(int(doc_id)) + ), raw_ontology=ontology).transform_pack()) self.write({ "id": doc_id, "textPack": textpack, @@ -295,6 +297,8 @@ def load_database(self, load_samples: bool = False): project_names = set(p["name"] for p in project_list) for project_dir in os.listdir(sample_path): project_path = os.path.join(sample_path, project_dir) + if not os.path.isdir(project_path): + continue project_reader = StaveProjectReader(project_path) # Avoid loading duplicate sample projects if project_reader.project_name in project_names: diff --git a/simple-backend/stave_backend/sample_projects/project_2_example/.project_meta.json b/simple-backend/stave_backend/sample_projects/project_2_example/.project_meta.json index 8b6e798..854b5e9 100644 --- a/simple-backend/stave_backend/sample_projects/project_2_example/.project_meta.json +++ b/simple-backend/stave_backend/sample_projects/project_2_example/.project_meta.json @@ -1 +1 @@ -{"project_name": "project-2-example", "project_type": "single_pack", "ontology": {"name": "all_ontology", "definitions": [{"entry_name": "ft.onto.base_ontology.Token", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation :class:`Token`, used to represent a token or a word.", "attributes": [{"name": "pos", "type": "str"}, {"name": "ud_xpos", "type": "str", "description": "Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html"}, {"name": "lemma", "type": "str", "description": "Lemma or stem of word form."}, {"name": "chunk", "type": "str"}, {"name": "ner", "type": "str"}, {"name": "sense", "type": "str"}, {"name": "is_root", "type": "bool"}, {"name": "ud_features", "type": "Dict", "key_type": "str", "value_type": "str"}, {"name": "ud_misc", "type": "Dict", "key_type": "str", "value_type": "str"}]}, {"entry_name": "ft.onto.base_ontology.Document", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Document`, normally used to represent a document."}, {"entry_name": "ft.onto.base_ontology.Sentence", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Sentence`, normally used to represent a sentence.", "attributes": [{"name": "speaker", "type": "str"}, {"name": "part_id", "type": "int"}, {"name": "sentiment", "type": "Dict", "key_type": "str", "value_type": "float"}]}, {"entry_name": "ft.onto.base_ontology.Phrase", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Phrase`.", "attributes": [{"name": "phrase_type", "type": "str"}]}, {"entry_name": "ft.onto.base_ontology.Utterance", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Utterance`, normally used to represent an utterance in dialogue."}, {"entry_name": "ft.onto.base_ontology.PredicateArgument", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.", "attributes": [{"name": "ner_type", "type": "str"}, {"name": "predicate_lemma", "type": "str"}, {"name": "is_verb", "type": "bool"}]}, {"entry_name": "ft.onto.base_ontology.EntityMention", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.", "attributes": [{"name": "ner_type", "type": "str"}]}, {"entry_name": "ft.onto.base_ontology.EventMention", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `EventMention`, used to refer to a mention of an event.", "attributes": [{"name": "event_type", "type": "str"}]}, {"entry_name": "ft.onto.base_ontology.PredicateMention", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.", "attributes": [{"name": "predicate_lemma", "type": "str"}, {"name": "framenet_id", "type": "str"}, {"name": "is_verb", "type": "bool"}]}, {"entry_name": "ft.onto.base_ontology.PredicateLink", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a semantic role link between a predicate and its argument.", "attributes": [{"name": "arg_type", "type": "str", "description": "The predicate link type."}], "parent_type": "ft.onto.base_ontology.PredicateMention", "child_type": "ft.onto.base_ontology.PredicateArgument"}, {"entry_name": "ft.onto.base_ontology.Dependency", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a syntactic dependency.", "attributes": [{"name": "dep_label", "type": "str", "description": "The dependency label."}, {"name": "rel_type", "type": "str"}], "parent_type": "ft.onto.base_ontology.Token", "child_type": "ft.onto.base_ontology.Token"}, {"entry_name": "ft.onto.base_ontology.EnhancedDependency", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a enhanced dependency: \n https://universaldependencies.org/u/overview/enhanced-syntax.html", "attributes": [{"name": "dep_label", "type": "str", "description": "The enhanced dependency label in Universal Dependency."}], "parent_type": "ft.onto.base_ontology.Token", "child_type": "ft.onto.base_ontology.Token"}, {"entry_name": "ft.onto.base_ontology.RelationLink", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a relation between two entity mentions", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EntityMention", "child_type": "ft.onto.base_ontology.EntityMention"}, {"entry_name": "ft.onto.base_ontology.CrossDocEntityRelation", "parent_entry": "forte.data.ontology.top.MultiPackLink", "description": "A `Link` type entry which represent a relation between two entity mentions across the packs.", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EntityMention", "child_type": "ft.onto.base_ontology.EntityMention"}, {"entry_name": "ft.onto.base_ontology.CoreferenceGroup", "parent_entry": "forte.data.ontology.top.Group", "description": "A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.", "member_type": "ft.onto.base_ontology.EntityMention"}, {"entry_name": "ft.onto.base_ontology.EventRelation", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a relation between two event mentions.", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EventMention", "child_type": "ft.onto.base_ontology.EventMention"}, {"entry_name": "ft.onto.base_ontology.CrossDocEventRelation", "parent_entry": "forte.data.ontology.top.MultiPackLink", "description": "A `Link` type entry which represent a relation between two event mentions across the packs.", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EventMention", "child_type": "ft.onto.base_ontology.EventMention"}, {"entry_name": "edu.cmu.EventMention", "parent_entry": "ft.onto.base_ontology.EventMention", "description": "A span based annotation `EventMention`, used to refer to a mention of an event.", "attributes": [{"name": "is_valid", "type": "bool"}]}, {"entry_name": "edu.cmu.CrossEventRelation", "parent_entry": "ft.onto.base_ontology.CrossDocEventRelation", "description": "Represent relation cross documents.", "attributes": [{"name": "evidence", "type": "str"}], "parent_type": "edu.cmu.EventMention", "child_type": "edu.cmu.EventMention"}]}, "project_configs": null, "multi_ontology": {}} \ No newline at end of file +{"project_name": "project-2-example", "project_type": "single_pack", "ontology": {"name": "all_ontology", "additional_prefixes": ["edu.cmu"], "definitions": [{"entry_name": "ft.onto.base_ontology.Token", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation :class:`Token`, used to represent a token or a word.", "attributes": [{"name": "pos", "type": "str"}, {"name": "ud_xpos", "type": "str", "description": "Language specific pos tag. Used in CoNLL-U Format. Refer to https://universaldependencies.org/format.html"}, {"name": "lemma", "type": "str", "description": "Lemma or stem of word form."}, {"name": "chunk", "type": "str"}, {"name": "ner", "type": "str"}, {"name": "sense", "type": "str"}, {"name": "is_root", "type": "bool"}, {"name": "ud_features", "type": "Dict", "key_type": "str", "value_type": "str"}, {"name": "ud_misc", "type": "Dict", "key_type": "str", "value_type": "str"}]}, {"entry_name": "ft.onto.base_ontology.Document", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Document`, normally used to represent a document."}, {"entry_name": "ft.onto.base_ontology.Sentence", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Sentence`, normally used to represent a sentence.", "attributes": [{"name": "speaker", "type": "str"}, {"name": "part_id", "type": "int"}, {"name": "sentiment", "type": "Dict", "key_type": "str", "value_type": "float"}]}, {"entry_name": "ft.onto.base_ontology.Phrase", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Phrase`.", "attributes": [{"name": "phrase_type", "type": "str"}]}, {"entry_name": "ft.onto.base_ontology.Utterance", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `Utterance`, normally used to represent an utterance in dialogue."}, {"entry_name": "ft.onto.base_ontology.PredicateArgument", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `PredicateArgument`, normally used to represent an argument of a predicate, can be linked to the predicate via the predicate link.", "attributes": [{"name": "ner_type", "type": "str"}, {"name": "predicate_lemma", "type": "str"}, {"name": "is_verb", "type": "bool"}]}, {"entry_name": "ft.onto.base_ontology.EntityMention", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `EntityMention`, normally used to represent an Entity Mention in a piece of text.", "attributes": [{"name": "ner_type", "type": "str"}]}, {"entry_name": "ft.onto.base_ontology.EventMention", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `EventMention`, used to refer to a mention of an event.", "attributes": [{"name": "event_type", "type": "str"}]}, {"entry_name": "ft.onto.base_ontology.PredicateMention", "parent_entry": "forte.data.ontology.top.Annotation", "description": "A span based annotation `PredicateMention`, normally used to represent a predicate (normally verbs) in a piece of text.", "attributes": [{"name": "predicate_lemma", "type": "str"}, {"name": "framenet_id", "type": "str"}, {"name": "is_verb", "type": "bool"}]}, {"entry_name": "ft.onto.base_ontology.PredicateLink", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a semantic role link between a predicate and its argument.", "attributes": [{"name": "arg_type", "type": "str", "description": "The predicate link type."}], "parent_type": "ft.onto.base_ontology.PredicateMention", "child_type": "ft.onto.base_ontology.PredicateArgument"}, {"entry_name": "ft.onto.base_ontology.Dependency", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a syntactic dependency.", "attributes": [{"name": "dep_label", "type": "str", "description": "The dependency label."}, {"name": "rel_type", "type": "str"}], "parent_type": "ft.onto.base_ontology.Token", "child_type": "ft.onto.base_ontology.Token"}, {"entry_name": "ft.onto.base_ontology.EnhancedDependency", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a enhanced dependency: \n https://universaldependencies.org/u/overview/enhanced-syntax.html", "attributes": [{"name": "dep_label", "type": "str", "description": "The enhanced dependency label in Universal Dependency."}], "parent_type": "ft.onto.base_ontology.Token", "child_type": "ft.onto.base_ontology.Token"}, {"entry_name": "ft.onto.base_ontology.RelationLink", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a relation between two entity mentions", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EntityMention", "child_type": "ft.onto.base_ontology.EntityMention"}, {"entry_name": "ft.onto.base_ontology.CrossDocEntityRelation", "parent_entry": "forte.data.ontology.top.MultiPackLink", "description": "A `Link` type entry which represent a relation between two entity mentions across the packs.", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EntityMention", "child_type": "ft.onto.base_ontology.EntityMention"}, {"entry_name": "ft.onto.base_ontology.CoreferenceGroup", "parent_entry": "forte.data.ontology.top.Group", "description": "A group type entry that take `EntityMention`, as members, used to represent coreferent group of entities.", "member_type": "ft.onto.base_ontology.EntityMention"}, {"entry_name": "ft.onto.base_ontology.EventRelation", "parent_entry": "forte.data.ontology.top.Link", "description": "A `Link` type entry which represent a relation between two event mentions.", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EventMention", "child_type": "ft.onto.base_ontology.EventMention"}, {"entry_name": "ft.onto.base_ontology.CrossDocEventRelation", "parent_entry": "forte.data.ontology.top.MultiPackLink", "description": "A `Link` type entry which represent a relation between two event mentions across the packs.", "attributes": [{"name": "rel_type", "type": "str", "description": "The type of the relation."}], "parent_type": "ft.onto.base_ontology.EventMention", "child_type": "ft.onto.base_ontology.EventMention"}, {"entry_name": "edu.cmu.EventMention", "parent_entry": "ft.onto.base_ontology.EventMention", "description": "A span based annotation `EventMention`, used to refer to a mention of an event.", "attributes": [{"name": "is_valid", "type": "bool"}]}, {"entry_name": "edu.cmu.CrossEventRelation", "parent_entry": "ft.onto.base_ontology.CrossDocEventRelation", "description": "Represent relation cross documents.", "attributes": [{"name": "evidence", "type": "str"}], "parent_type": "edu.cmu.EventMention", "child_type": "edu.cmu.EventMention"}]}, "project_configs": null, "multi_ontology": {}} \ No newline at end of file diff --git a/src/nlpviewer/lib/transform.ts b/src/nlpviewer/lib/transform.ts index 0fde169..c2568bd 100644 --- a/src/nlpviewer/lib/transform.ts +++ b/src/nlpviewer/lib/transform.ts @@ -6,7 +6,7 @@ import { ILink, IProjectConfigs, } from './interfaces'; -import {isEntryAnnotation, isEntryLink, camelCaseDeep} from './utils'; +import {camelCaseDeep} from './utils'; export function transformPack( rawPack: string, @@ -14,111 +14,12 @@ export function transformPack( ): [ISinglePack, IOntology] { const data = JSON.parse(rawPack); const config = JSON.parse(rawOntology); - - const packData = data['py/state']; - const annotations = packData.annotations.filter((a: any) => !!a['py/state']); - const configTransformed = { constraints: [], ...camelCaseDeep(config), }; - const formatedAnnotations = annotations - .map((a: any) => { - const legendName = getLegendName(a); - - return { - span: { - begin: a['py/state']._span.begin, - end: a['py/state']._span.end, - }, - id: a['py/state']._tid + '', - legendId: legendName, - attributes: getAttrs(configTransformed, a), - }; - }) - .filter(Boolean); - - const links = packData.links - .map((link: any) => { - const legendName = getLegendName(link); - - return { - id: link['py/state']._tid + '', - fromEntryId: link['py/state']._parent + '', - toEntryId: link['py/state']._child + '', - legendId: legendName, - attributes: getAttrs(configTransformed, link), - }; - }) - .filter(Boolean); - - const groups = packData.groups.map((group: any) => { - const legendName = getLegendName(group); - - return { - id: group['py/state']._tid + '', - members: group['py/state']['_members']['py/set'].map((i: any) => i + ''), - memberType: getGroupType(legendName, configTransformed), - legendId: legendName, - attributes: getAttrs(configTransformed, group), - }; - }); - - const pack = { - text: packData._text, - annotations: formatedAnnotations, - links: links, - groups: groups, - attributes: - // Backward compatibility with Forte formats. - 'meta' in packData - ? packData.meta['py/state'] - : packData._meta['py/state'], - }; - - return [pack, configTransformed] as any; -} - -function getLegendName(a: any) { - return a['py/object']; -} - -function getAttrs(config: any, a: any) { - const legendName = getLegendName(a); - - const legend = config['definitions'].find( - (entry: any) => entry.entryName === legendName - ); - - if (!legend || !legend.attributes) { - return {}; - } - - const attrNames = legend.attributes.map((a: any) => a.name); - const attrs: any = {}; - - Object.keys(a['py/state']).forEach(key => { - if (attrNames.includes(key)) { - attrs[key] = a['py/state'][key]; - } - }); - - return attrs; -} - -function getGroupType(groupEntryName: any, config: any) { - const entry = config.definitions.find( - (ent: any) => ent.entryName === groupEntryName - ); - - if (isEntryAnnotation(config, entry.memberType)) { - return 'annotation'; - } else if (isEntryLink(config, entry.memberType)) { - return 'link'; - } else { - throw new Error('unknown group entry ' + groupEntryName); - } + return [data, configTransformed] as any; } export function transformBackAnnotation(annotation: IAnnotation): any {