Skip to content

Commit

Permalink
Create VectorEmbeddings.py
Browse files Browse the repository at this point in the history
  • Loading branch information
harshitv804 authored Aug 11, 2023
1 parent f607c70 commit 53ba73c
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions VectorEmbeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-mpnet-base-dot-v1")
persist_directory = "ipc_vector_data"
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)

0 comments on commit 53ba73c

Please sign in to comment.