-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdoc_gpt.py
113 lines (93 loc) · 3.56 KB
/
doc_gpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
from PIL import Image
import os
from openai.error import OpenAIError
from doc_components.sidebar import sidebar
from utils_doc import (
embed_docs,
get_answer,
get_sources,
parse_docx,
parse_pdf,
parse_txt,
search_docs,
text_to_docs,
wrap_text_in_html,
)
def clear_submit():
st.session_state["submit"] = False
st.set_page_config(page_title="AI writer assistant", page_icon="img/Oxta_MLOpsFactor_logo.png", layout="wide")
st.markdown('''<style>.css-1egvi7u {margin-top: -4rem;}</style>''',
unsafe_allow_html=True)
hide_decoration_bar_style = '''<style>header {visibility: hidden;}</style>'''
st.markdown(hide_decoration_bar_style, unsafe_allow_html=True)
# Design hide "made with streamlit" footer menu area
hide_streamlit_footer = """<style>#MainMenu {visibility: hidden;}
footer {visibility: hidden;}</style>"""
st.markdown(hide_streamlit_footer, unsafe_allow_html=True)
st.header("📖Ask your Doc")
sidebar()
uploaded_file = st.file_uploader(
"Upload a pdf, docx, or txt file",
type=["pdf", "docx", "txt"],
help="Scanned documents are not supported yet!",
on_change=clear_submit,
)
index = None
doc = None
if uploaded_file is not None:
if uploaded_file.name.endswith(".pdf"):
doc = parse_pdf(uploaded_file)
elif uploaded_file.name.endswith(".docx"):
doc = parse_docx(uploaded_file)
elif uploaded_file.name.endswith(".txt"):
doc = parse_txt(uploaded_file)
else:
raise ValueError("File type not supported!")
text = text_to_docs(doc)
try:
with st.spinner("Indexing document... This may take a while⏳"):
index = embed_docs(text)
st.session_state["api_key_configured"] = True
except OpenAIError as e:
st.error(e._message)
query = st.text_area("Ask a question about the document", on_change=clear_submit)
with st.expander("Advanced Options"):
show_all_chunks = st.checkbox("Show all chunks retrieved from vector search")
show_full_doc = st.checkbox("Show parsed contents of the document")
if show_full_doc and doc:
with st.expander("Document"):
# Hack to get around st.markdown rendering LaTeX
st.markdown(f"<p>{wrap_text_in_html(doc)}</p>", unsafe_allow_html=True)
button = st.button("Submit")
if button or st.session_state.get("submit"):
if not st.session_state.get("api_key_configured"):
st.error("Please configure your OpenAI API key!")
elif not index:
st.error("Please upload a document!")
elif not query:
st.error("Please enter a question!")
else:
st.session_state["submit"] = True
# Output Columns
answer_col, sources_col = st.columns(2)
sources = search_docs(index, query)
try:
answer = get_answer(sources, query)
if not show_all_chunks:
# Get the sources for the answer
sources = get_sources(answer, sources)
with answer_col:
st.markdown("#### Answer")
st.markdown(answer["output_text"].split("SOURCES: ")[0])
with sources_col:
st.markdown("#### Sources")
for source in sources:
st.markdown(source.page_content)
st.markdown(source.metadata["source"])
st.markdown("---")
except OpenAIError as e:
st.error(e._message)
# Display the image with text on top
st.write("Each document are using OpenAI API call. Please consider before proceeding.")
#st.image(img, caption=None, width=200)