Skip to content

Commit

Permalink
Merge pull request #752 from airbnb/mtl-update_repo_save
Browse files Browse the repository at this point in the history
[kp] add html file into .kp folder
  • Loading branch information
mengting1010 authored Feb 20, 2023
2 parents e835c73 + bdd81df commit e7f052e
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 15 deletions.
5 changes: 5 additions & 0 deletions knowledge_repo/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import uuid
import yaml
import PIL.Image
import markdown

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -334,6 +335,10 @@ def write(self, md, headers=None, images={}, interactive=False):
md
)

# convert md to html
html = markdown.markdown(md)
self._write_ref('knowledge.html', encode(html))

self._write_ref('knowledge.md', encode(md))

for image, data in list(images.items()):
Expand Down
3 changes: 2 additions & 1 deletion knowledge_repo/repositories/s3repositorty.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ def _save(self, file, file_path, src_paths=[]):
# upload files to S3
for dirpath, dirnames, filenames in os.walk(os.path.join(self._path, file_path)):
for filename in filenames:
content_type = "text/html" if filename.endswith(".html") else "binary/octet-stream"
upload_file_to_s3(self._s3_client, os.path.join(
dirpath, filename), self._s3_bucket, os.path.join(remove_prefix(dirpath, self._path), filename))
dirpath, filename), self._s3_bucket, os.path.join(remove_prefix(dirpath, self._path), filename), content_type)

# delete raw file after post processing and upload
if os.path.exists(file):
Expand Down
25 changes: 11 additions & 14 deletions knowledge_repo/utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,25 @@
import logging
import os
import json
from s3path import S3Path
import re

logger = logging.getLogger(__name__)
AWS_S3_AUTH_PATH = '.configs/aws_s3_auth.json'


def parse_s3_path(s3_url):
"""Get s3_path for S3 Object URL
:param s3_url: url of s3 object
:return: bucket and key name
"""
path = S3Path.from_uri(s3_url)
return path.bucket, path.key


def parse_s3_uri(s3_uri):
"""Get s3_path for S3 Object URL
:param s3_url: url of s3 object
:return: s3_bucket, s3_client
"""
path = S3Path.from_uri(s3_uri)
uri_splt = path.key.split('/')
return path.bucket, get_s3_client(uri_splt[1], uri_splt[2], uri_splt[0]), uri_splt[3] if len(uri_splt) > 3 else ''

matches = re.match("s3://(.*?)/(.*)/(.*)/", s3_uri)
if matches:
bucket, _, key_name = matches.groups()
else:
raise ValueError(f'Cannot interpret {s3_uri}')
return bucket, get_s3_client(), key_name


def get_s3_client():
Expand Down Expand Up @@ -55,13 +49,15 @@ def upload_file_to_s3(
file_name,
bucket,
object_name=None,
content_type="binary/octet-stream",
):
"""Upload a file to an object in an S3 bucket
:param s3_client: a boto3 S3 client
:param file_name: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified, file_name is used
:param content_type: AWS S3 Content Type, default to "binary/octet-stream"
:return: True if file was uploaded, else False
"""

Expand All @@ -75,6 +71,7 @@ def upload_file_to_s3(
file_name,
bucket,
object_name,
ExtraArgs={'ContentType': content_type}
)
logger.info(response)
except ClientError as client_error:
Expand Down

0 comments on commit e7f052e

Please sign in to comment.