obot-platform · njhale · Jan 30, 2025 · Jan 29, 2025 · Jan 30, 2025
diff --git a/google/docs/move_doc.py b/google/docs/move_doc.py
@@ -14,13 +14,21 @@ def move_doc(drive_service, document_id, folder_path):
         return
 
     if folder_path.strip() == "/":
+        # Get the current parent folder(s)
+        file_metadata = drive_service.files().get(
+            fileId=document_id,
+            fields="parents"
+        ).execute()
+        current_parents = ",".join(file_metadata.get("parents", []))
+
         # Move the document back to the root folder
         drive_service.files().update(
             fileId=document_id,
-            addParents="root",  # Add to the root folder
-            removeParents="root",  # Ensure no redundant updates
+            addParents="root",
+            removeParents=current_parents,
             fields="id, parents"
         ).execute()
+
         print("Document moved back to the root folder.")
         return
 

diff --git a/google/docs/read_doc.py b/google/docs/read_doc.py
@@ -1,6 +1,9 @@
+import io
 import sys
 import os
 
+from googleapiclient.http import MediaIoBaseDownload
+
 from auth import client
 from id import extract_file_id
 
@@ -11,44 +14,26 @@ def main():
             raise ValueError('DOC_REF environment variable is missing or empty')
 
         file_id = extract_file_id(doc_ref)
-        service = client('docs', 'v1')
-        document = service.documents().get(documentId=file_id).execute()
 
-        print(convert_to_markdown(document))
+        service = client('drive', 'v3')
+
+        request = service.files().export_media(
+            fileId=file_id,
+            mimeType='text/markdown'
+        )
+        file = io.BytesIO()
+        downloader = MediaIoBaseDownload(file, request)
+        done = False
+
+        while not done:
+            _, done = downloader.next_chunk()
+
+        print(file.getvalue().decode('utf-8'))
 
     except Exception as err:
         sys.stderr.write(err)
         sys.exit(1)
 
-def convert_to_markdown(document):
-    md_text = ""
-    for element in document.get('body', {}).get('content', []):
-        if 'paragraph' in element:
-            for part in element['paragraph']['elements']:
-                text_run = part.get('textRun')
-                if text_run:
-                    md_text += text_run['content']
-            md_text += "\n\n"  # Separate paragraphs with extra newlines
-        elif 'table' in element:
-            md_text += parse_table(element['table'])
-            md_text += "\n\n"  # Extra newline after a table
-    return md_text
-
-def parse_table(table):
-    md_table = ""
-    for row in table.get('tableRows', []):
-        row_text = "|"
-        for cell in row.get('tableCells', []):
-            cell_text = ""
-            for content in cell.get('content', []):
-                if 'paragraph' in content:
-                    for element in content['paragraph']['elements']:
-                        text_run = element.get('textRun')
-                        if text_run:
-                            cell_text += text_run['content']
-            row_text += f" {cell_text.strip()} |"
-        md_table += row_text + "\n"
-    return md_table
 
 if __name__ == "__main__":
     main()
diff --git a/google/docs/requirements.txt b/google/docs/requirements.txt
@@ -1,5 +1,3 @@
 google-api-python-client
 google-auth-httplib2
-google-auth-oauthlib
-beautifulsoup4
-markdown
+google-auth-oauthlib
diff --git a/google/docs/tool.gpt b/google/docs/tool.gpt
@@ -20,7 +20,7 @@ Share Tools: Create Google Doc
 Share Context: Google Docs Context
 Credential: ../credential
 Param: doc_ref: Google Docs ID or share link of the document to read.
-Param: doc_drive_dir: Optional folder path in Google Drive to move the document to after updating it. Use "/" to move the document back to the root folder.
+Param: doc_drive_dir: Optional folder path in Google Drive to move the document to after updating it. Use `/` to move the document back to the root folder.
 Param: doc_content: Markdown formatted content to replace the existing content of the document with.
 
 #!/usr/bin/env python3 ${GPTSCRIPT_TOOL_DIR}/update_doc.py

diff --git a/google/docs/update_doc.py b/google/docs/update_doc.py
@@ -1,138 +1,37 @@
 import sys
 import os
+import io
 
-import markdown
-from bs4 import BeautifulSoup
+from googleapiclient.http import MediaIoBaseUpload
 
 from auth import client
 from id import extract_file_id
 from move_doc import move_doc
 
 
-def markdown_to_google_doc_requests(markdown_content):
-    # Convert markdown content to HTML
-    html_content = markdown.markdown(markdown_content)
-    soup = BeautifulSoup(html_content, 'html.parser')
-
-    requests = []
-    current_index = 1
-
-    def add_text_request(text, bold=False, italic=False, underline=False, link=None):
-        nonlocal current_index
-        # Skip completely empty or whitespace-only values, except for single newlines
-        if not text.strip() and text != "\n":
-            return
-
-        text_style = {
-            "bold": bold,
-            "italic": italic,
-            "underline": underline,
-        }
-        if link:
-            text_style["link"] = {"url": link}
-
-        text_length = len(text)
-        requests.append({
-            "insertText": {
-                "location": {"index": current_index},
-                "text": text
-            }
-        })
-
-        if text_style or link:
-            requests.append({
-                "updateTextStyle": {
-                    "range": {
-                        "startIndex": current_index,
-                        "endIndex": current_index + text_length
-                    },
-                    "textStyle": text_style,
-                    "fields": ",".join(text_style.keys())
-                }
-            })
-
-        current_index += text_length
-
-        # Handle unstyled newlines
-        if text.endswith("\n"):
-            newline_length = 1
-            requests.append({
-                "updateTextStyle": {
-                    "range": {
-                        "startIndex": current_index - newline_length,
-                        "endIndex": current_index
-                    },
-                    "textStyle": {},  # Explicitly remove styles
-                    "fields": "bold,italic,underline,link"
-                }
-            })
-
-    for element in soup.contents:
-        if element.name in ['p']:
-            add_text_request(element.get_text())
-            add_text_request("\n")
-        elif element.name in ['h1', 'h2', 'h3']:
-            add_text_request(element.get_text(), bold=True)
-            add_text_request("\n")
-        elif element.name in ['ul']:
-            for li in element.find_all('li'):
-                add_text_request("\u2022 " + li.get_text())
-                add_text_request("\n")
-        elif element.name in ['ol']:
-            for i, li in enumerate(element.find_all('li'), start=1):
-                add_text_request(f"{i}. " + li.get_text())
-                add_text_request("\n")
-        elif element.name == 'a':
-            add_text_request(element.get_text(), link=element['href'])
-        elif element.name == 'table':
-            for row in element.find_all('tr'):
-                row_text = "\t".join([cell.get_text() for cell in row.find_all(['td', 'th'])]) + "\n"
-                add_text_request(row_text)
-        else:
-            add_text_request(element.get_text())
-            add_text_request("\n")
-
-    return requests
-
 def update_doc(file_id, doc_content, drive_dir):
     if doc_content:
-        try:
-            requests = markdown_to_google_doc_requests(doc_content)
-        except Exception as e:
-            raise ValueError(f"Failed to parse given doc content: {e}")
-
-        docs_service = client('docs', 'v1')
         drive_service = client('drive', 'v3')
 
-        # Retrieve the document to determine its length
-        document = docs_service.documents().get(documentId=file_id).execute()
-        content = document.get('body').get('content')
-        document_length = content[-1].get('endIndex') if content and 'endIndex' in content[-1] else 1
-
-        if document_length > 2:
-            # Prepare requests to clear existing document content
-            requests = [
-                {
-                    "deleteContentRange": {
-                        "range": {
-                            "startIndex": 1,
-                            "endIndex": document_length - 1
-                        }
-                    }
-                }
-            ] + requests
-
-        # Issue a batch update request to clear and apply new content
-        response = docs_service.documents().batchUpdate(
-            documentId=file_id,
-            body={"requests": requests}
+        # Convert Markdown content into an in-memory file
+        markdown_file = io.BytesIO(doc_content.encode("utf-8"))
+
+        # Use media upload for Drive import
+        media = MediaIoBaseUpload(markdown_file, mimetype="text/markdown", resumable=True)
+
+        # Overwrite the existing Google Doc with imported content
+        updated_file = drive_service.files().update(
+            fileId=file_id,
+            media_body=media,
+            body={'mimeType': 'application/vnd.google-apps.document'}
         ).execute()
 
-        print(f"Document updated successfully: {file_id}")
+        print(f"Document replaced successfully using import: https://docs.google.com/document/d/{file_id}")
 
     # Move the document to the specified folder
     move_doc(drive_service, file_id, drive_dir)
 
+
 def main():
     try:
         doc_ref = os.getenv('DOC_REF')