Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: create and update google docs by importing markdown directly #385

Merged
merged 2 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions google/docs/move_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,21 @@ def move_doc(drive_service, document_id, folder_path):
return

if folder_path.strip() == "/":
# Get the current parent folder(s)
file_metadata = drive_service.files().get(
fileId=document_id,
fields="parents"
).execute()
current_parents = ",".join(file_metadata.get("parents", []))

# Move the document back to the root folder
drive_service.files().update(
fileId=document_id,
addParents="root", # Add to the root folder
removeParents="root", # Ensure no redundant updates
addParents="root",
removeParents=current_parents,
fields="id, parents"
).execute()

print("Document moved back to the root folder.")
return

Expand Down
49 changes: 17 additions & 32 deletions google/docs/read_doc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import io
import sys
import os

from googleapiclient.http import MediaIoBaseDownload

from auth import client
from id import extract_file_id

Expand All @@ -11,44 +14,26 @@ def main():
raise ValueError('DOC_REF environment variable is missing or empty')

file_id = extract_file_id(doc_ref)
service = client('docs', 'v1')
document = service.documents().get(documentId=file_id).execute()

print(convert_to_markdown(document))
service = client('drive', 'v3')

request = service.files().export_media(
fileId=file_id,
mimeType='text/markdown'
)
file = io.BytesIO()
downloader = MediaIoBaseDownload(file, request)
done = False

while not done:
_, done = downloader.next_chunk()

print(file.getvalue().decode('utf-8'))

except Exception as err:
sys.stderr.write(err)
sys.exit(1)

def convert_to_markdown(document):
md_text = ""
for element in document.get('body', {}).get('content', []):
if 'paragraph' in element:
for part in element['paragraph']['elements']:
text_run = part.get('textRun')
if text_run:
md_text += text_run['content']
md_text += "\n\n" # Separate paragraphs with extra newlines
elif 'table' in element:
md_text += parse_table(element['table'])
md_text += "\n\n" # Extra newline after a table
return md_text

def parse_table(table):
md_table = ""
for row in table.get('tableRows', []):
row_text = "|"
for cell in row.get('tableCells', []):
cell_text = ""
for content in cell.get('content', []):
if 'paragraph' in content:
for element in content['paragraph']['elements']:
text_run = element.get('textRun')
if text_run:
cell_text += text_run['content']
row_text += f" {cell_text.strip()} |"
md_table += row_text + "\n"
return md_table

if __name__ == "__main__":
main()
4 changes: 1 addition & 3 deletions google/docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
google-api-python-client
google-auth-httplib2
google-auth-oauthlib
beautifulsoup4
markdown
google-auth-oauthlib
2 changes: 1 addition & 1 deletion google/docs/tool.gpt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Share Tools: Create Google Doc
Share Context: Google Docs Context
Credential: ../credential
Param: doc_ref: Google Docs ID or share link of the document to read.
Param: doc_drive_dir: Optional folder path in Google Drive to move the document to after updating it. Use "/" to move the document back to the root folder.
Param: doc_drive_dir: Optional folder path in Google Drive to move the document to after updating it. Use `/` to move the document back to the root folder.
Param: doc_content: Markdown formatted content to replace the existing content of the document with.

#!/usr/bin/env python3 ${GPTSCRIPT_TOOL_DIR}/update_doc.py
Expand Down
131 changes: 15 additions & 116 deletions google/docs/update_doc.py
Original file line number Diff line number Diff line change
@@ -1,138 +1,37 @@
import sys
import os
import io

import markdown
from bs4 import BeautifulSoup
from googleapiclient.http import MediaIoBaseUpload

from auth import client
from id import extract_file_id
from move_doc import move_doc


def markdown_to_google_doc_requests(markdown_content):
# Convert markdown content to HTML
html_content = markdown.markdown(markdown_content)
soup = BeautifulSoup(html_content, 'html.parser')

requests = []
current_index = 1

def add_text_request(text, bold=False, italic=False, underline=False, link=None):
nonlocal current_index
# Skip completely empty or whitespace-only values, except for single newlines
if not text.strip() and text != "\n":
return

text_style = {
"bold": bold,
"italic": italic,
"underline": underline,
}
if link:
text_style["link"] = {"url": link}

text_length = len(text)
requests.append({
"insertText": {
"location": {"index": current_index},
"text": text
}
})

if text_style or link:
requests.append({
"updateTextStyle": {
"range": {
"startIndex": current_index,
"endIndex": current_index + text_length
},
"textStyle": text_style,
"fields": ",".join(text_style.keys())
}
})

current_index += text_length

# Handle unstyled newlines
if text.endswith("\n"):
newline_length = 1
requests.append({
"updateTextStyle": {
"range": {
"startIndex": current_index - newline_length,
"endIndex": current_index
},
"textStyle": {}, # Explicitly remove styles
"fields": "bold,italic,underline,link"
}
})

for element in soup.contents:
if element.name in ['p']:
add_text_request(element.get_text())
add_text_request("\n")
elif element.name in ['h1', 'h2', 'h3']:
add_text_request(element.get_text(), bold=True)
add_text_request("\n")
elif element.name in ['ul']:
for li in element.find_all('li'):
add_text_request("\u2022 " + li.get_text())
add_text_request("\n")
elif element.name in ['ol']:
for i, li in enumerate(element.find_all('li'), start=1):
add_text_request(f"{i}. " + li.get_text())
add_text_request("\n")
elif element.name == 'a':
add_text_request(element.get_text(), link=element['href'])
elif element.name == 'table':
for row in element.find_all('tr'):
row_text = "\t".join([cell.get_text() for cell in row.find_all(['td', 'th'])]) + "\n"
add_text_request(row_text)
else:
add_text_request(element.get_text())
add_text_request("\n")

return requests

def update_doc(file_id, doc_content, drive_dir):
if doc_content:
try:
requests = markdown_to_google_doc_requests(doc_content)
except Exception as e:
raise ValueError(f"Failed to parse given doc content: {e}")

docs_service = client('docs', 'v1')
drive_service = client('drive', 'v3')

# Retrieve the document to determine its length
document = docs_service.documents().get(documentId=file_id).execute()
content = document.get('body').get('content')
document_length = content[-1].get('endIndex') if content and 'endIndex' in content[-1] else 1

if document_length > 2:
# Prepare requests to clear existing document content
requests = [
{
"deleteContentRange": {
"range": {
"startIndex": 1,
"endIndex": document_length - 1
}
}
}
] + requests

# Issue a batch update request to clear and apply new content
response = docs_service.documents().batchUpdate(
documentId=file_id,
body={"requests": requests}
# Convert Markdown content into an in-memory file
markdown_file = io.BytesIO(doc_content.encode("utf-8"))

# Use media upload for Drive import
media = MediaIoBaseUpload(markdown_file, mimetype="text/markdown", resumable=True)

# Overwrite the existing Google Doc with imported content
updated_file = drive_service.files().update(
fileId=file_id,
media_body=media,
body={'mimeType': 'application/vnd.google-apps.document'}
).execute()

print(f"Document updated successfully: {file_id}")
print(f"Document replaced successfully using import: https://docs.google.com/document/d/{file_id}")

# Move the document to the specified folder
move_doc(drive_service, file_id, drive_dir)


def main():
try:
doc_ref = os.getenv('DOC_REF')
Expand Down