Skip to content

Commit

Permalink
fix: fix the ext of the generated summary file
Browse files Browse the repository at this point in the history
  • Loading branch information
tybalex committed Mar 6, 2025
1 parent b9d7790 commit cb67727
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 15 deletions.
10 changes: 7 additions & 3 deletions file-summarizer/file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from summarize import DocumentSummarizer, MODEL, TIKTOKEN_MODEL, MAX_CHUNK_TOKENS, MAX_WORKERS
import os
import tiktoken
import asyncio

logger = setup_logger(__name__)

Expand All @@ -28,12 +29,15 @@ async def main():
try:
final_summary = summarizer.summarize(file_content)
except Exception as e:
logger.error(f"Summarization failed: {e}")
raise Exception(f"ERROR: Summarization failed: {e}")

response_str = f"Uploaded file {input_file} contains {len(tokens)} tokens.\n\n"
response_str += f"Summary of the file content:\n\n{final_summary}"
response_str = f"The uploaded file {input_file} contains too many tokens ({len(tokens)}), here is the summary of the file content:\n\n{final_summary}"
print(response_str)
return response_str
else: # if the file has less than TOKEN_THRESHOLD tokens, directly return the file content
print(file_content)
return file_content
return file_content

if __name__ == "__main__":
asyncio.run(main())
4 changes: 3 additions & 1 deletion file-summarizer/load_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import fitz # PyMuPDF
import docx
from pptx import Presentation
from helper import load_from_gptscript_workspace, save_to_gptscript_workspace
from helper import load_from_gptscript_workspace, setup_logger

logger = setup_logger(__name__)

def extract_text_from_pdf(pdf_bytes: bytes) -> str:
"""Extracts text from a PDF file given as bytes."""
Expand Down Expand Up @@ -53,6 +54,7 @@ async def load_text_from_file(file_path: str) -> str:
try:
file_content = await load_from_gptscript_workspace(file_path)
except Exception as e:
logger.error(f"Failed to load file from GPTScript workspace file {file_path}, Error: {e}")
raise ValueError(
f"Failed to load file from GPTScript workspace file {file_path}, Error: {e}"
)
Expand Down
2 changes: 1 addition & 1 deletion file-summarizer/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ async def main():
if output_file == "":
directory, file_name = os.path.split(input_file)
name, ext = os.path.splitext(file_name)
summary_file_name = f"{name}_summary{ext}"
summary_file_name = f"{name}_summary.md"
output_file = os.path.join(directory, summary_file_name)

try:
Expand Down
11 changes: 1 addition & 10 deletions file-summarizer/tool.gpt
Original file line number Diff line number Diff line change
@@ -1,20 +1,11 @@
---
Name: File Summarizer
Description: This tool summarizes the input file in the workspace, returns a text summary of the file content, either write to a file in the workspace or print to the console.
Credential: sys.model.provider.credential
Params: input_file: (Required) Name of the file in the workspace to summarize. Supported formats: [.md", ".txt", ".markdown", ".text", ".mdx", ".mdtxt", ".mdtxtx", ".docx", ".pdf", ".pptx"]. For any other file types, simply say it's not supported yet.
Params: input_file: (Required) Name of the file in the workspace to summarize. Supported formats: [.md, .txt, .markdown, .text, .mdx, .mdtxt, .mdtxtx, .docx, .pdf, .pptx]. For any other file types, simply say it's not supported yet.
Params: output_file: (Optional) Name of the file to save the summary, default to empty string. If not provided, a summary file will be created in the same directory as the input file. To print to the console, set this to "NONE".

#!/usr/bin/env python3 ${GPTSCRIPT_TOOL_DIR}/summarize.py

---
Name: File Reader
Description: This tool reads the input file in the workspace, returns the file content and print to the console.
Credential: sys.model.provider.credential
Params: input_file: (Required) Name of the file in the workspace to summarize. Supported formats: [.md", ".txt", ".markdown", ".text", ".mdx", ".mdtxt", ".mdtxtx", ".docx", ".pdf", ".pptx"]. For any other file types, simply say it's not supported yet.

#!/usr/bin/env python3 ${GPTSCRIPT_TOOL_DIR}/file_reader.py

---
!metadata:*:category
Utilities
Expand Down

0 comments on commit cb67727

Please sign in to comment.