diff --git a/basemodels/manifest/data/groundtruth.py b/basemodels/manifest/data/groundtruth.py index 65dfcba..9074306 100644 --- a/basemodels/manifest/data/groundtruth.py +++ b/basemodels/manifest/data/groundtruth.py @@ -1,7 +1,7 @@ from typing import List, Optional, Union import requests -from pydantic import BaseModel, HttpUrl, ValidationError +from pydantic import BaseModel, HttpUrl, ValidationError, conlist, validator, root_validator, Field from requests import RequestException from typing_extensions import Literal @@ -78,13 +78,36 @@ class ILASGroundtruthEntry(BaseModel): ilas_groundtruth_entry_type = List[List[ILASGroundtruthEntry]] ILASGroundtruthEntryModel = create_wrapper_model(ilas_groundtruth_entry_type) +class TLMSSGroundTruthEntry(BaseModel): + start: int + end: int + label: str + + +""" +Groundtruth file format for `text_label_multiple_span_select` job type + +{ + "https://domain.com/file1.txt": [ + { + "start": 0, + "end": 4, + "label": "0" + } + ] +} +""" +tlmss_groundtruth_entry_type = List[TLMSSGroundTruthEntry] +TLMSSGroundTruthEntryModel = create_wrapper_model(tlmss_groundtruth_entry_type) + + groundtruth_entry_models_map = { "image_label_binary": ILBGroundtruthEntryModel, "image_label_multiple_choice": ILMCGroundtruthEntryModel, "image_label_area_select": ILASGroundtruthEntryModel, + "text_label_multiple_span_select": TLMSSGroundTruthEntryModel, } - def validate_content_type(uri: str) -> None: """Validate uri content type""" try: diff --git a/basemodels/manifest/manifest.py b/basemodels/manifest/manifest.py index dcf2ebe..73974a9 100644 --- a/basemodels/manifest/manifest.py +++ b/basemodels/manifest/manifest.py @@ -37,6 +37,7 @@ class BaseJobTypesEnum(str, Enum): image_label_binary = "image_label_binary" image_label_multiple_choice = "image_label_multiple_choice" text_free_entry = "text_free_entry" + text_label_multiple_span_select = "text_label_multiple_span_select" text_multiple_choice_one_option = "text_multiple_choice_one_option" text_multiple_choice_multiple_options = "text_multiple_choice_multiple_options" image_label_area_adjust = "image_label_area_adjust" diff --git a/tests/test_manifest_validation.py b/tests/test_manifest_validation.py old mode 100755 new mode 100644 index 9f297f9..dfccdc9 --- a/tests/test_manifest_validation.py +++ b/tests/test_manifest_validation.py @@ -721,6 +721,43 @@ def test_groundtruth_uri_ilas_invalid_key(self): with self.assertRaises(ValidationError): self.validate_groundtruth_response("image_label_area_select", body) + def test_groundtruth_uri_tlmss_valid(self): + groundtruth_uri = "https://domain.com/file1.txt" + body = { + groundtruth_uri: [ + {"start": 0, "end": 4, "label": "0"}, + {"start": 17, "end": 89, "label": "1"}, + ] + } + self.register_http_response(groundtruth_uri, method=httpretty.HEAD, headers={"Content-Type": "text/plain"}) + self.validate_groundtruth_response("text_label_multiple_span_select", body) + + + def test_groundtruth_uri_tlmss_invalid_key(self): + body = { + "not_uri": [ + {"start": 0, "end": 4, "label": "0"}, + {"start": 17, "end": 89, "label": "1"}, + ] + } + + with self.assertRaises(ValidationError): + self.validate_groundtruth_response("text_label_multiple_span_select", body) + + + def test_groundtruth_uri_tlmss_invalid_value(self): + body = { + "https://www.domain.com/file1.txt": [ + {"span": [0, 4]}, + {"span": [17, 89], "label": "1"}, + ] + } + + with self.assertRaises(ValidationError): + self.validate_groundtruth_response("text_label_multiple_span_select", body) + + + def test_groundtruth_uri_ilas_invalid_value(self): body = {"https://domain.com/file1.jpeg": [[True]]}