forked from run-llama/llama_index
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Jerry Liu <[email protected]>
- Loading branch information
Showing
7 changed files
with
279 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
wikipedia | ||
pymongo | ||
slack_sdk | ||
discord.py | ||
|
||
google-api-python-client | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "effeb5a7-8544-4ee4-8c11-bad0d8165394", | ||
"metadata": {}, | ||
"source": [ | ||
"# Discord Demo\n", | ||
"Demonstrates our Discord data connector" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "04edcd4a-5633-47ee-8a92-ff2f6abc2ec7", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# This is due to the fact that we use asyncio.loop_until_complete in\n", | ||
"# the DiscordReader. Since the Jupyter kernel itself runs on\n", | ||
"# an event loop, we need to add some help with nesting\n", | ||
"!pip install nest_asyncio\n", | ||
"import nest_asyncio\n", | ||
"nest_asyncio.apply()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6ea1f66d-10ed-4417-bdcb-f8a894836ea5", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from gpt_index import GPTListIndex, DiscordReader\n", | ||
"from IPython.display import Markdown, display\n", | ||
"import os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "da90589a-fb44-4ec6-9706-753dba4fa968", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"discord_token = os.getenv(\"DISCORD_TOKEN\")\n", | ||
"channel_ids = [1057178784895348746] # Replace with your channel_id\n", | ||
"documents = DiscordReader(discord_token=discord_token).load_data(channel_ids=channel_ids)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "341295df-2029-4728-ab3d-2ee178a7e6f1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"index = GPTListIndex(documents)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "01c26b9d-49ec-4a6e-9c61-5c06bb86bbb2", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"response = index.query(\"<query_text>\", verbose=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "f160c678-2fb5-4d6d-b2bc-87abb61cfdec", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"display(Markdown(f\"<b>{response}</b>\"))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "gpt_retrieve_venv", | ||
"language": "python", | ||
"name": "gpt_retrieve_venv" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.16" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
"""Discord reader. | ||
Note: this file is named discord_reader.py to avoid conflicts with the | ||
discord.py module. | ||
""" | ||
|
||
import asyncio | ||
import logging | ||
import os | ||
from typing import Any, List, Optional | ||
|
||
from gpt_index.readers.base import BaseReader | ||
from gpt_index.readers.schema.base import Document | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
async def read_channel( | ||
discord_token: str, channel_id: int, limit: Optional[int], oldest_first: bool | ||
) -> str: | ||
"""Async read channel. | ||
Note: This is our hack to create a synchronous interface to the | ||
async discord.py API. We use the `asyncio` module to run | ||
this function with `asyncio.get_event_loop().run_until_complete`. | ||
""" | ||
import discord # noqa: F401 | ||
|
||
messages: List[discord.Message] = [] | ||
|
||
class CustomClient(discord.Client): | ||
async def on_ready(self) -> None: | ||
try: | ||
print(f"{self.user} has connected to Discord!") | ||
channel = client.get_channel(channel_id) | ||
# only work for text channels for now | ||
if not isinstance(channel, discord.TextChannel): | ||
raise ValueError( | ||
f"Channel {channel_id} is not a text channel. " | ||
"Only text channels are supported for now." | ||
) | ||
# thread_dict maps thread_id to thread | ||
thread_dict = {} | ||
for thread in channel.threads: | ||
thread_dict[thread.id] = thread | ||
|
||
async for msg in channel.history( | ||
limit=limit, oldest_first=oldest_first | ||
): | ||
messages.append(msg) | ||
if msg.id in thread_dict: | ||
thread = thread_dict[msg.id] | ||
async for thread_msg in thread.history( | ||
limit=limit, oldest_first=oldest_first | ||
): | ||
messages.append(thread_msg) | ||
except Exception as e: | ||
print("Encountered error: " + str(e)) | ||
finally: | ||
await self.close() | ||
|
||
intents = discord.Intents.default() | ||
intents.message_content = True | ||
client = CustomClient(intents=intents) | ||
await client.start(discord_token) | ||
|
||
msg_txt_list = [m.content for m in messages] | ||
|
||
return "\n\n".join(msg_txt_list) | ||
|
||
|
||
class DiscordReader(BaseReader): | ||
"""Discord reader. | ||
Reads conversations from channels. | ||
Args: | ||
discord_token (Optional[str]): Discord token. If not provided, we | ||
assume the environment variable `DISCORD_TOKEN` is set. | ||
""" | ||
|
||
def __init__(self, discord_token: Optional[str] = None) -> None: | ||
"""Initialize with parameters.""" | ||
try: | ||
import discord # noqa: F401 | ||
except ImportError: | ||
raise ValueError( | ||
"`discord.py` package not found, please run `pip install discord.py`" | ||
) | ||
if discord_token is None: | ||
discord_token = os.environ["DISCORD_TOKEN"] | ||
if discord_token is None: | ||
raise ValueError( | ||
"Must specify `discord_token` or set environment " | ||
"variable `DISCORD_TOKEN`." | ||
) | ||
|
||
self.discord_token = discord_token | ||
|
||
def _read_channel( | ||
self, channel_id: int, limit: Optional[int] = None, oldest_first: bool = True | ||
) -> str: | ||
"""Read channel.""" | ||
result = asyncio.get_event_loop().run_until_complete( | ||
read_channel( | ||
self.discord_token, channel_id, limit=limit, oldest_first=oldest_first | ||
) | ||
) | ||
return result | ||
|
||
def load_data(self, **load_kwargs: Any) -> List[Document]: | ||
"""Load data from the input directory. | ||
Args: | ||
channel_ids (List[str]): List of channel ids to read. | ||
limit (Optional[int]): Maximum number of messages to read. | ||
oldest_first (bool): Whether to read oldest messages first. | ||
Defaults to `True`. | ||
Returns: | ||
List[Document]: List of documents. | ||
""" | ||
channel_ids = load_kwargs.pop("channel_ids", None) | ||
if channel_ids is None: | ||
raise ValueError('Must specify a "channel_id" in `load_kwargs`.') | ||
limit = load_kwargs.pop("limit", None) | ||
oldest_first = load_kwargs.pop("oldest_first", True) | ||
|
||
results = [] | ||
for channel_id in channel_ids: | ||
if not isinstance(channel_id, int): | ||
raise ValueError( | ||
f"Channel id {channel_id} must be an integer, " | ||
f"not {type(channel_id)}." | ||
) | ||
channel_content = self._read_channel( | ||
channel_id, limit=limit, oldest_first=oldest_first | ||
) | ||
results.append( | ||
Document(channel_content, extra_info={"channel": channel_id}) | ||
) | ||
return results | ||
|
||
|
||
if __name__ == "__main__": | ||
reader = DiscordReader() | ||
print("initialized reader") | ||
output = reader.load_data(channel_ids=[1057178784895348746], limit=10) | ||
print(output) |