Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Replicate API with Llama-2-13b-chat LLM and add document context bot using langchain. #131

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,8 @@ cpcli-env/
dmypy.json

# Pyre type checker
.pyre/
.pyre/

# Local Database Stuff
textbase/utils/DB
textbase/utils/SOURCE_DOCUMENTS
56 changes: 56 additions & 0 deletions docs/docs/examples/replicate-bot.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---
sidebar_position: 3
---

# Replicate bot

This bot makes an API call to Replicate and processes the user input. It uses Facebook's [Llama-2-13b-chat](https://replicate.com/a16z-infra/llama-2-13b-chat) model.

```py
from textbase import bot, Message
from textbase.models import Replicate
from typing import List

# Load your Replicate API key
Replicate.api_key = "r8_MrTf8Dfd0UzLeCpLQjOIg4oW5yu9uQK0XohS1"

# Prompt for Llama 2
SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense,
or is not factually coherent, explain why instead of answering something not correct.
If you don't know the answer to a question, please don't share false information.
"""

@bot()
def on_message(message_history: List[Message], state: dict = None):

# Generate Llama 2 response
bot_response = Replicate.generate(
system_prompt=SYSTEM_PROMPT,
message_history=message_history, # Assuming history is the list of user messages
model="a16z-infra/llama-2-13b-chat:9dff94b1bed5af738655d4a7cbcdcde2bd503aa85c94334fe1f42af7f3dd5ee3",
)

response = {
"data": {
"messages": [
{
"data_type": "STRING",
"value": bot_response
}
],
"state": state
},
"errors": [
{
"message": ""
}
]
}

return {
"status_code": 200,
"response": response
}
```
3 changes: 2 additions & 1 deletion docs/docs/get-started/create-your-first-bot.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ You can make your own model using NLP and ML or you can make use of one of our i
Currently we support:
- OpenAI
- HuggingFace ([Microsoft/dialoGPT-large](https://huggingface.co/microsoft/DialoGPT-large))
- BotLibre
- BotLibre
- Replicate ([a16z-infra/llama-2-13b-chat](https://replicate.com/a16z-infra/llama-2-13b-chat))
37 changes: 37 additions & 0 deletions examples/document-bot/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from textbase import bot, Message
from textbase.models import ContextOpenAPI
from typing import List

# Load your OpenAI API key
ContextOpenAPI.api_key = ""

@bot()
def on_message(message_history: List[Message], state: dict = None):

# Generate GPT-3.5 Turbo response
bot_response = ContextOpenAPI.generate(
message_history=message_history, # Assuming history is the list of user messages
model="gpt-3.5-turbo",
)

response = {
"data": {
"messages": [
{
"data_type": "STRING",
"value": bot_response
}
],
"state": state
},
"errors": [
{
"message": ""
}
]
}

return {
"status_code": 200,
"response": response
}
46 changes: 46 additions & 0 deletions examples/replicate-bot/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from textbase import bot, Message
from textbase.models import Replicate
from typing import List

# Load your Replicate API key
Replicate.api_key = ""

# Prompt for Llama 2
SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense,
or is not factually coherent, explain why instead of answering something not correct.
If you don't know the answer to a question, please don't share false information.
"""

@bot()
def on_message(message_history: List[Message], state: dict = None):

# Generate Llama 2 response
bot_response = Replicate.generate(
system_prompt=SYSTEM_PROMPT,
message_history=message_history, # Assuming history is the list of user messages
model="a16z-infra/llama-2-13b-chat:9dff94b1bed5af738655d4a7cbcdcde2bd503aa85c94334fe1f42af7f3dd5ee3",
)

response = {
"data": {
"messages": [
{
"data_type": "STRING",
"value": bot_response
}
],
"state": state
},
"errors": [
{
"message": ""
}
]
}

return {
"status_code": 200,
"response": response
}
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ python-dotenv = "^1.0.0"
tabulate = "^0.9.0"
functions-framework = "^3.4.0"
yaspin = "^3.0.0"
pydantic = "^2.3.0"
pydantic = "^1.10.0"
replicate = "^0.11.0"
langchain = {extras = ["llms"], version = "^0.0.279"}
chromadb = "^0.4.8"
pdfminer-six = "^20221105"
tiktoken = "^0.4.0"

[build-system]
requires = ["poetry-core"]
Expand Down
104 changes: 103 additions & 1 deletion textbase/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,16 @@
import time
import typing
import traceback
import replicate
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

from textbase import Message
from .utils.constants import PERSIST_DIRECTORY

# Return list of values of content.
def get_contents(message: Message, data_type: str):
Expand Down Expand Up @@ -143,4 +151,98 @@ def generate(
data = json.loads(response.text) # parse the JSON data into a dictionary
message = data['message']

return message
return message

class Replicate:
api_key = None

@classmethod
def generate(
cls,
system_prompt: str,
message_history: list[Message],
model="a16z-infra/llama-2-13b-chat:9dff94b1bed5af738655d4a7cbcdcde2bd503aa85c94334fe1f42af7f3dd5ee3",
max_new_tokens=900,
temperature=0.75,
) -> str:
try:
assert cls.api_key is not None, "Replicate API key is not set."
Client = replicate.Client(api_token=cls.api_key)

message_prompt = ""

for message in message_history:
# While managing dialogue state with multiple exchanges between a user and the model,
# we need to mark the dialogue turns with instruction tags that indicate the beginning
# ("[INST]") and end (`"/INST]") of user input.
if(message["role"] == "user"):
message_prompt += "[INST] {} [/INST]".format(message["content"][0]["value"])
else:
message_prompt += message["content"][0]["value"]

output = Client.run(
model_version=model,
input={
"system_prompt": system_prompt,
"prompt": message_prompt,
"max_new_tokens": max_new_tokens,
"temperature": temperature
}
)

response = ""

for item in output:
response += str(item)

return response


except Exception:
print(f"An exception occurred while using this model, please try using another model.\nException: {traceback.format_exc()}.")

class ContextOpenAPI:
api_key = None

@classmethod
def generate(
cls,
message_history: list[Message],
model="gpt-3.5-turbo",
max_tokens=1000,
):
assert cls.api_key is not None, "OpenAI API key is not set."

most_recent_message = get_contents(message_history[-1], "STRING")
print(most_recent_message)

embeddings = OpenAIEmbeddings(
openai_api_key=cls.api_key
)

db = Chroma(
persist_directory=PERSIST_DIRECTORY,
embedding_function=embeddings,
)

retriever = db.as_retriever()

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\
just say that you don't know, don't try to make up an answer.

{context}

{history}
Question: {question}
Helpful Answer:"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"])
MEMORY = ConversationBufferMemory(input_key="question", memory_key="history")

chain_type_kwargs = {"prompt": PROMPT, "memory": MEMORY}
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(openai_api_key=cls.api_key, model=model, max_tokens=max_tokens), chain_type="stuff", retriever=retriever, chain_type_kwargs=chain_type_kwargs)

response = qa(str(most_recent_message))
print(response)

return response["result"]
30 changes: 30 additions & 0 deletions textbase/utils/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os

from chromadb.config import Settings

from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader

ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))

SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS"

PERSIST_DIRECTORY = f"{ROOT_DIRECTORY}/DB"

INGEST_THREADS = os.cpu_count() or 8

CHROMA_SETTINGS = Settings(
anonymized_telemetry=False,
is_persistent=True,
)

DOCUMENT_MAP = {
".txt": TextLoader,
".md": TextLoader,
".py": TextLoader,
".pdf": PDFMinerLoader,
".csv": CSVLoader,
".xls": UnstructuredExcelLoader,
".xlsx": UnstructuredExcelLoader,
".docx": Docx2txtLoader,
".doc": Docx2txtLoader,
}
Loading