Skip to content

Commit

Permalink
Server Update
Browse files Browse the repository at this point in the history
  • Loading branch information
Ans155 committed Mar 5, 2024
1 parent a3c117c commit a5f815c
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 50 deletions.
4 changes: 3 additions & 1 deletion Server/.env
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
GOOGLE_API_KEY="AIzaSyApZQkstczSJgK67uhKjBU8IlPT2wGYrag"
GOOGLE_API_KEY="AIzaSyApZQkstczSJgK67uhKjBU8IlPT2wGYrag"
astra_db_application_token = "AstraCS:caMHvgsIrkHpjTCAoyhRnsRK:945bdbb46d23f616e73a5a3675bcbfa6353152446500bec0e2ab1185119736f3"
astra_db_api_endpoint = "https://328b1b9d-5197-46ee-9b0b-5435b0c3543a-us-east1.apps.astra.datastax.com"
Binary file modified Server/__pycache__/test.cpython-311.pyc
Binary file not shown.
Binary file not shown.
Binary file removed Server/pdf/meta-earnings-report.pdf
Binary file not shown.
Binary file removed Server/pdf/tesla-earnings-report.pdf
Binary file not shown.
8 changes: 7 additions & 1 deletion Server/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
streamlit
google-generativeai
python-dotenv
langchain
langchain==0.0.266
PyPDF2
faiss-cpu
langchain_google_genai
--upgrade astrapy
langchain_community
pypdf==3.15.0
requests
fastapi
uvicorn
sentence_transformers==2.2.2
InstructorEmbedding==1.0.1
9 changes: 2 additions & 7 deletions Server/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

astra_db_application_token=os.getenv("astra_db_application_token")
astra_db_api_endpoint=os.getenv("astra_db_api_endpoint")
memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
app = FastAPI()
app.add_middleware(
Expand Down Expand Up @@ -74,7 +75,6 @@ def get_conversational_chain():
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.8)

prompt = PromptTemplate(template=prompt_template, input_variables=["chat_history","context", "human_input"])

# conversation = ConversationChain(
# prompt=prompt,
# llm=model,
Expand All @@ -83,17 +83,12 @@ def get_conversational_chain():
# )
# parser = StrOutputParser()
# chain = prompt | model | parser


chain = load_qa_chain(model, chain_type="stuff",memory=memory,prompt=prompt)
#print(prompt)
return chain

def user_input(user_question):
#embeddings = HuggingFaceInstructEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings = HuggingFaceInstructEmbeddings( model_name="hkunlp/instructor-large")
astra_db_application_token = "AstraCS:caMHvgsIrkHpjTCAoyhRnsRK:945bdbb46d23f616e73a5a3675bcbfa6353152446500bec0e2ab1185119736f3"
astra_db_api_endpoint = "https://328b1b9d-5197-46ee-9b0b-5435b0c3543a-us-east1.apps.astra.datastax.com"
vstore = AstraDB(
embedding=embeddings,
collection_name="pdfdata",
Expand Down
71 changes: 30 additions & 41 deletions Server/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,62 +12,51 @@
from langchain.embeddings import HuggingFaceInstructEmbeddings
import requests
from bs4 import BeautifulSoup

from langchain.document_loaders import PyPDFDirectoryLoader
from langchain_community.vectorstores import AstraDB
from langchain_core.output_parsers import StrOutputParser
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
astra_db_application_token=os.getenv("astra_db_application_token")
astra_db_api_endpoint=os.getenv("astra_db_api_endpoint")
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))



def get_website_text(url):
response = requests.get(url)
if response.status_code != 200:
print(f"Error: Failed to fetch URL (status code: {response.status_code})")
return None

soup = BeautifulSoup(response.content, 'html.parser')

text_elements = soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'span'])

text = ' '.join(element.get_text(separator='\n') for element in text_elements)

return text


def get_pdf_text_from_folder(folder_path):

text = ""
for filename in os.listdir(folder_path):

if filename.endswith(".pdf"):
filepath = os.path.join(folder_path, filename)
pdf_reader = PdfReader(filepath)
for page in pdf_reader.pages:
text += page.extract_text()

text = text.replace("\n", "").replace("\t", "")
return text
loader = PyPDFDirectoryLoader(folder_path)
docs = loader.load()
return docs


def get_text_chunks(text):

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
chunks = text_splitter.split_text(text)
return chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(text)
return texts

def get_vector_store(text_chunks):

#embeddings = HuggingFaceInstructEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings = HuggingFaceInstructEmbeddings( model_name="hkunlp/instructor-large")
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
vector_store.save_local("faiss_index")
vstore = AstraDB(
embedding=embeddings,
collection_name="pdfdata",
token=astra_db_application_token,
api_endpoint=astra_db_api_endpoint
)
vstore.add_documents(text_chunks)
# vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
# vector_store.save_local("faiss_index")

if __name__ == "__main__":


website_url = "https://www.stockgro.club/faq/"
website_text = get_website_text(website_url)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
website_texts = text_splitter.split_text(website_text)
get_vector_store(website_texts)
# loader = WebBaseLoader("https://www.stockgro.club/faq/")
# data = loader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
# website_texts = text_splitter.split_documents(data)
# get_vector_store(website_texts)
pdf_folder_path = "pdf"
pdf_folder_full_path = os.path.join(os.path.dirname(__file__), pdf_folder_path)
raw_text = get_pdf_text_from_folder(pdf_folder_full_path)
Expand Down

0 comments on commit a5f815c

Please sign in to comment.