fix models

Tauffer-Consulting · Dec 7, 2023 · 6a12755 · 6a12755
1 parent 4915b51
commit 6a12755
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 44 deletions.
diff --git a/.domino/compiled_metadata.json b/.domino/compiled_metadata.json
@@ -466,16 +466,32 @@
             "description": "Input data for TextSummarizerPiece",
             "properties": {
                 "input_file_path": {
+                    "anyOf": [
+                        {
+                            "type": "string"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
                     "default": "",
                     "description": "The path to the text file to summarize.",
-                    "title": "Input File Path",
-                    "type": "string"
+                    "from_upstream": "always",
+                    "title": "Input File Path"
                 },
                 "input_text": {
+                    "anyOf": [
+                        {
+                            "type": "string"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
                     "default": "",
                     "description": "The text to summarize.",
                     "title": "Input Text",
-                    "type": "string"
+                    "widget": "textarea"
                 },
                 "output_type": {
                     "allOf": [
@@ -1046,8 +1062,8 @@
                     ],
                     "default": null,
                     "description": "Text to summarize",
-                    "required": false,
-                    "title": "Text"
+                    "title": "Text",
+                    "widget": "textarea"
                 },
                 "text_file_path": {
                     "anyOf": [
@@ -1060,7 +1076,7 @@
                     ],
                     "default": null,
                     "description": "Use it only if not using text field. File path to the text to summarize",
-                    "required": false,
+                    "from_upstream": "always",
                     "title": "Text File Path"
                 },
                 "output_type": {

diff --git a/pieces/TextSummarizerLocalPiece/models.py b/pieces/TextSummarizerLocalPiece/models.py
@@ -1,5 +1,5 @@
 from pydantic import BaseModel, Field, FilePath, validators
-from typing import Union
+from typing import Union, Optional
 from enum import Enum
 
 
@@ -12,13 +12,19 @@ class InputModel(BaseModel):
     """
     Input data for TextSummarizerPiece
     """
-    input_file_path: str = Field(
+    input_file_path: Optional[str] = Field(
         description='The path to the text file to summarize.',
-        default=""
+        default="",
+        json_schema_extra={
+            "from_upstream": "always"
+        }
     )
-    input_text: str = Field(
+    input_text: Optional[str] = Field(
         description='The text to summarize.',
-        default=""
+        default="",
+        json_schema_extra={
+            'widget': "textarea",
+        }
     )
     output_type: OutputTypeType = Field(
         description='The type of output fot the result text.',

diff --git a/pieces/TextSummarizerLocalPiece/piece.py b/pieces/TextSummarizerLocalPiece/piece.py
@@ -6,37 +6,49 @@
 
 
 
-def summarize_long_text(text: str, summarizer, iteration: int=0):
-    """
-    Generate the summary by concatenating the summaries of the individual chunks.
-    """
-    iteration += 1
-    print(f"Iteration: {iteration}")
-
-    # Preprocess text
-    text = text.lower().replace(".", " ").replace(",", " ").replace("\n", " ")
-    text = "".join(ch if ch.isalnum() or ch == " " else " " for ch in text)
-
-    # Split the input text into chunks
-    chunk_size = 1000
-    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
-    print(f"chunks to process: {len(chunks)}")
-
-    # Generate the summary for each chunk
-    summary_list = [
-        summarizer(chunk, max_length=60, min_length=30, no_repeat_ngram_size=3)[0]['summary_text']
-        for chunk in chunks
-    ]
-    summary = " ".join(summary_list)
-
-    if len(summary) > 2000:
-        return summarize_long_text(summary, summarizer, iteration)
-    else:
-        return summary
-
-
 class TextSummarizerLocalPiece(BasePiece):
 
+    def summarize_long_text(self, text: str, summarizer, iteration: int=0):
+        """
+        Generate the summary by concatenating the summaries of the individual chunks.
+        """
+        iteration += 1
+        print(f"Iteration: {iteration}")
+
+        # Preprocess text
+        text = text.lower().replace(".", " ").replace(",", " ").replace("\n", " ")
+        text = "".join(ch if ch.isalnum() or ch == " " else " " for ch in text)
+
+        # Split the input text into chunks
+        chunk_size = 1000
+        chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
+        print(f"chunks to process: {len(chunks)}")
+
+        # Generate the summary for each chunk
+        summary_list = [
+            summarizer(chunk, max_length=60, min_length=30, no_repeat_ngram_size=3)[0]['summary_text']
+            for chunk in chunks
+        ]
+        summary = " ".join(summary_list)
+
+        if len(summary) > 2000:
+            return self.summarize_long_text(summary, summarizer, iteration)
+        else:
+            return summary
+
+    def format_display_result(self, final_summary: str):
+        md_text = f"""
+## Summarized text
+{final_summary}
+"""
+        file_path = f"{self.results_path}/display_result.md"
+        with open(file_path, "w") as f:
+            f.write(md_text)
+        self.display_result = {
+            "file_type": "md",
+            "file_path": file_path
+        }
+
     def piece_function(self, input_data: InputModel):
 
         # Set device
@@ -65,7 +77,7 @@ def piece_function(self, input_data: InputModel):
 
         # Run summarizer
         self.logger.info("Running summarizer...")
-        result = summarize_long_text(text=text_str, summarizer=summarizer)
+        result = self.summarize_long_text(text=text_str, summarizer=summarizer)
 
         # Return result
         if input_data.output_type == "xcom":
@@ -81,6 +93,7 @@ def piece_function(self, input_data: InputModel):
             with open(output_file_path, "w") as f:
                 f.write(result)
 
+        self.format_display_result(final_summary=result)
         return OutputModel(
             message=msg,
             summary_result=summary_result,

diff --git a/pieces/TextSummarizerPiece/models.py b/pieces/TextSummarizerPiece/models.py
@@ -23,16 +23,20 @@ class LLMModelType(str, Enum):
 class InputModel(BaseModel):
     """
     TextSummarizerPiece Input model
-    """    
+    """
     text: Optional[str] = Field(
         default=None,
         description="Text to summarize",
-        required=False # Setting to false because can use text or text_file_path
+        json_schema_extra={
+            'widget': "textarea",
+        }
     )
     text_file_path: Optional[str] = Field(
         default=None,
         description="Use it only if not using text field. File path to the text to summarize",
-        required=False # Setting to false because can use text or text_file_path
+        json_schema_extra={
+            "from_upstream": "always"
+        }
     )
     output_type: OutputTypeType = Field(
         default=OutputTypeType.string,