Skip to content

Commit

Permalink
init commit
Browse files Browse the repository at this point in the history
  • Loading branch information
davidcrab committed May 16, 2023
0 parents commit 343288f
Show file tree
Hide file tree
Showing 19 changed files with 8,834 additions and 0 deletions.
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Powering your products with ChatGPT and your own data

The Chatbot Kickstarter is a starter repo to get you used to building basic a basic Chatbot using the ChatGPT API and your own knowledge base. The flow you're taken through was originally presented with [these slides](https://drive.google.com/file/d/1dB-RQhZC_Q1iAsHkNNdkqtxxXqYODFYy/view?usp=share_link), which may come in useful to refer to.

This repo contains one notebook and two basic Streamlit apps:
- `powering_your_products_with_chatgpt_and_your_data.ipynb`: A notebook containing a step by step process of tokenising, chunking and embedding your data in a vector database, and building simple Q&A and Chatbot functionality on top.
- `search.py`: A Streamlit app providing simple Q&A via a search bar to query your knowledge base.
- `chat.py`: A Streamlit app providing a simple Chatbot via a search bar to query your knowledge base.

To run either version of the app, please follow the instructions in the respective README.md files in the subdirectories.

## How it works

The notebook is the best place to start, and is broadly laid out as follows:
- **Lay the foundations:**
- Set up the vector database to accept vectors and data
- Load the dataset, chunk the data up for embedding and store in the vector database
- **Make it a product:**
- Add a retrieval step where users provide queries and we return the most relevant entries
- Summarise search results with GPT-3
- Test out this basic Q&A app in Streamlit
- **Build your moat:**
- Create an Assistant class to manage context and interact with our bot
- Use the Chatbot to answer questions using semantic search context
- Test out this basic Chatbot app in Streamlit

Once you've run the notebook and tried the two Streamlit apps, you should be in a position to strip out any useful snippets and start your own Q&A or Chat application.

## Limitations

- This app uses Redis as a vector database, but there are many other options highlighted `../examples/vector_databases` depending on your need.
- This is a simple starting point - if you hit issues deploying your use case you may need to tune (non-exhaustive list):
- The prompt and parameters for the model for it to answer accurately
- Your search to return more relevant results
- Your chunking/embedding approach to store the most relevant content effectively for retrieval
Binary file added __pycache__/config.cpython-39.pyc
Binary file not shown.
Binary file added __pycache__/database.cpython-39.pyc
Binary file not shown.
Binary file added __pycache__/transformers.cpython-39.pyc
Binary file not shown.
83 changes: 83 additions & 0 deletions chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import streamlit as st
from streamlit_chat import message

from database import get_redis_connection
from chatbot import RetrievalAssistant, Message

# Initialise database

## Initialise Redis connection
redis_client = get_redis_connection()

# Set instruction

# System prompt requiring Question and Year to be extracted from the user
system_prompt = '''
You are a helpful Formula 1 knowledge base assistant. You need to capture a Question and Year from each customer.
The Question is their query on Formula 1, and the Year is the year of the applicable Formula 1 season.
Think about this step by step:
- The user will ask a Question
- You will ask them for the Year if their question didn't include a Year
- Once you have the Year, say "searching for answers".
Example:
User: I'd like to know the cost cap for a power unit
Assistant: Certainly, what year would you like this for?
User: 2023 please.
Assistant: Searching for answers.
'''

### CHATBOT APP

st.set_page_config(
page_title="Streamlit Chat - Demo",
page_icon=":robot:"
)

st.title('Formula 1 Chatbot')
st.subheader("Help us help you learn about Formula 1")

if 'generated' not in st.session_state:
st.session_state['generated'] = []

if 'past' not in st.session_state:
st.session_state['past'] = []

def query(question):
response = st.session_state['chat'].ask_assistant(question)
return response

prompt = st.text_input("What do you want to know: ","", key="input")

if st.button('Submit', key='generationSubmit'):

# Initialization
if 'chat' not in st.session_state:
st.session_state['chat'] = RetrievalAssistant()
messages = []
system_message = Message('system',system_prompt)
messages.append(system_message.message())
else:
messages = []


user_message = Message('user',prompt)
messages.append(user_message.message())

response = query(messages)

# Debugging step to print the whole response
#st.write(response)

st.session_state.past.append(prompt)
st.session_state.generated.append(response['content'])

if st.session_state['generated']:

for i in range(len(st.session_state['generated'])-1, -1, -1):
message(st.session_state["generated"][i], key=str(i))
message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
84 changes: 84 additions & 0 deletions chatbot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import openai
from termcolor import colored
import streamlit as st

from database import get_redis_connection,get_redis_results

from config import CHAT_MODEL,COMPLETIONS_MODEL, INDEX_NAME

redis_client = get_redis_connection()

# A basic class to create a message as a dict for chat
class Message:


def __init__(self,role,content):

self.role = role
self.content = content

def message(self):

return {"role": self.role,"content": self.content}

# New Assistant class to add a vector database call to its responses
class RetrievalAssistant:

def __init__(self):
self.conversation_history = []

def _get_assistant_response(self, prompt):

try:
completion = openai.ChatCompletion.create(
model=CHAT_MODEL,
messages=prompt,
temperature=0.1
)

response_message = Message(completion['choices'][0]['message']['role'],completion['choices'][0]['message']['content'])
return response_message.message()

except Exception as e:

return f'Request failed with exception {e}'

# The function to retrieve Redis search results
def _get_search_results(self,prompt):
latest_question = prompt
search_content = get_redis_results(redis_client,latest_question,INDEX_NAME)['result'][0]
return search_content


def ask_assistant(self, next_user_prompt):
[self.conversation_history.append(x) for x in next_user_prompt]
assistant_response = self._get_assistant_response(self.conversation_history)

# Answer normally unless the trigger sequence is used "searching_for_answers"
if 'searching for answers' in assistant_response['content'].lower():
question_extract = openai.Completion.create(model=COMPLETIONS_MODEL,prompt=f"Extract the user's latest question and the year for that question from this conversation: {self.conversation_history}. Extract it as a sentence stating the Question and Year")
search_result = self._get_search_results(question_extract['choices'][0]['text'])

# We insert an extra system prompt here to give fresh context to the Chatbot on how to use the Redis results
# In this instance we add it to the conversation history, but in production it may be better to hide
self.conversation_history.insert(-1,{"role": 'system',"content": f"Answer the user's question using this content: {search_result}. If you cannot answer the question, say 'Sorry, I don't know the answer to this one'"})

assistant_response = self._get_assistant_response(self.conversation_history)

self.conversation_history.append(assistant_response)
return assistant_response
else:
self.conversation_history.append(assistant_response)
return assistant_response


def pretty_print_conversation_history(self, colorize_assistant_replies=True):
for entry in self.conversation_history:
if entry['role'] == 'system':
pass
else:
prefix = entry['role']
content = entry['content']
output = colored(prefix +':\n' + content, 'green') if colorize_assistant_replies and entry['role'] == 'assistant' else prefix +':\n' + content
#prefix = entry['role']
print(output)
8 changes: 8 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
COMPLETIONS_MODEL = "text-davinci-003"
EMBEDDINGS_MODEL = "text-embedding-ada-002"
CHAT_MODEL = 'gpt-3.5-turbo'
TEXT_EMBEDDING_CHUNK_SIZE=300
VECTOR_FIELD_NAME='content_vector'
PREFIX = "moodfitdocs"
INDEX_NAME = "moodfit-index"
OPENAI_API_KEY = "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"
Binary file not shown.
Loading

0 comments on commit 343288f

Please sign in to comment.