Skip to content

Commit

Permalink
fix(api): incorrect rag endpoint config (#13)
Browse files Browse the repository at this point in the history
This PR fixes the incorrect RAG endpoint setting for the API service.

Close #12
  • Loading branch information
mawandm authored Apr 8, 2024
1 parent b32dc6a commit 9d458a0
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 45 deletions.
4 changes: 3 additions & 1 deletion nesis/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@
PRETTY_LOG_FORMAT = (
"%(asctime)s.%(msecs)03d [%(levelname)-8s] %(name)+25s - %(message)s"
)
logging.basicConfig(level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt="%H:%M:%S")
logging.basicConfig(
level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt="%Y-%m-%d %H:%M:%S"
)
logging.captureWarnings(True)
2 changes: 1 addition & 1 deletion nesis/api/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ database:
url: postgresql://postgres:password@localhost:65432/nesis
debug: false

llm:
rag:
endpoint: "http://localhost:9080"

service:
Expand Down
24 changes: 12 additions & 12 deletions nesis/api/core/document_loaders/minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

def fetch_documents(
connection: Dict[str, str],
pgpt_endpoint: str,
rag_endpoint: str,
http_client: http.HttpClient,
metadata: Dict[str, Any],
) -> None:
Expand All @@ -47,14 +47,14 @@ def fetch_documents(
_sync_s3_documents(
client=_minio_client,
connection=connection,
pgpt_endpoint=pgpt_endpoint,
rag_endpoint=rag_endpoint,
http_client=http_client,
metadata=metadata,
)
_unsync_s3_documents(
client=_minio_client,
connection=connection,
pgpt_endpoint=pgpt_endpoint,
rag_endpoint=rag_endpoint,
http_client=http_client,
)
except:
Expand All @@ -64,7 +64,7 @@ def fetch_documents(
def _sync_s3_documents(
client: Minio,
connection: dict,
pgpt_endpoint: str,
rag_endpoint: str,
http_client: http.HttpClient,
metadata: dict,
) -> None:
Expand All @@ -81,7 +81,7 @@ def _sync_s3_documents(
work_dir = f"/tmp/{uuid.uuid4()}"
pathlib.Path(work_dir).mkdir(parents=True)

_LOG.info(f"Initializing syncing to endpoint {pgpt_endpoint}")
_LOG.info(f"Initializing syncing to endpoint {rag_endpoint}")

for bucket_name in bucket_names_parts:
try:
Expand All @@ -93,15 +93,15 @@ def _sync_s3_documents(
_sync_document(
client=client,
connection=connection,
pgpt_endpoint=pgpt_endpoint,
rag_endpoint=rag_endpoint,
http_client=http_client,
metadata=metadata,
bucket_name=bucket_name,
item=item,
work_dir=work_dir,
)

_LOG.info(f"Completed syncing to endpoint {pgpt_endpoint}")
_LOG.info(f"Completed syncing to endpoint {rag_endpoint}")

except:
_LOG.warn("Error fetching and updating documents", exc_info=True)
Expand All @@ -110,7 +110,7 @@ def _sync_s3_documents(
def _sync_document(
client: Minio,
connection: dict,
pgpt_endpoint: str,
rag_endpoint: str,
http_client: http.HttpClient,
metadata: dict,
bucket_name: str,
Expand Down Expand Up @@ -155,7 +155,7 @@ def _sync_document(
try:
util.un_ingest_file(
http_client=http_client,
endpoint=pgpt_endpoint,
endpoint=rag_endpoint,
doc_id=document_data["doc_id"],
)
except:
Expand All @@ -173,7 +173,7 @@ def _sync_document(
try:
response = ingest_file(
http_client=http_client,
endpoint=pgpt_endpoint,
endpoint=rag_endpoint,
metadata=_metadata,
file_path=file_path,
)
Expand Down Expand Up @@ -206,7 +206,7 @@ def _sync_document(


def _unsync_s3_documents(
client: Minio, connection: dict, pgpt_endpoint: str, http_client: http.HttpClient
client: Minio, connection: dict, rag_endpoint: str, http_client: http.HttpClient
) -> None:

try:
Expand All @@ -226,7 +226,7 @@ def _unsync_s3_documents(
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{pgpt_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
url=f"{rag_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
)
except:
_LOG.warn(
Expand Down
22 changes: 11 additions & 11 deletions nesis/api/core/document_loaders/samba.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _connect_samba_server(connection):
raise


def _sync_samba_documents(connection, pgpt_endpoint, http_client, metadata):
def _sync_samba_documents(connection, rag_endpoint, http_client, metadata):

username = connection["user"]
password = connection["password"]
Expand Down Expand Up @@ -110,7 +110,7 @@ def _sync_samba_documents(connection, pgpt_endpoint, http_client, metadata):
continue
try:
_process_file(
connection, file_share, work_dir, http_client, pgpt_endpoint, metadata
connection, file_share, work_dir, http_client, rag_endpoint, metadata
)
except:
_LOG.warn(
Expand All @@ -119,12 +119,12 @@ def _sync_samba_documents(connection, pgpt_endpoint, http_client, metadata):
)
_LOG.info(
f"Completed syncing files from samba server {endpoint} "
f"to endpoint {pgpt_endpoint}"
f"to endpoint {rag_endpoint}"
)


def _process_file(
connection, file_share, work_dir, http_client, pgpt_endpoint, metadata
connection, file_share, work_dir, http_client, rag_endpoint, metadata
):
username = connection["user"]
password = connection["password"]
Expand All @@ -138,7 +138,7 @@ def _process_file(
)
for dir_file in dir_files:
_process_file(
connection, dir_file, work_dir, http_client, pgpt_endpoint, metadata
connection, dir_file, work_dir, http_client, rag_endpoint, metadata
)
return

Expand Down Expand Up @@ -195,7 +195,7 @@ def _process_file(
try:
util.un_ingest_file(
http_client=http_client,
endpoint=pgpt_endpoint,
endpoint=rag_endpoint,
doc_id=document_data["doc_id"],
)
except:
Expand Down Expand Up @@ -223,7 +223,7 @@ def _process_file(
try:
response = ingest_file(
http_client=http_client,
endpoint=pgpt_endpoint,
endpoint=rag_endpoint,
metadata=_metadata,
file_path=file_path,
)
Expand All @@ -247,11 +247,11 @@ def _process_file(
exc_info=True,
)
_LOG.info(
f"Completed syncing files from shared_file share {file_share.path} to endpoint {pgpt_endpoint}"
f"Completed syncing files from shared_file share {file_share.path} to endpoint {rag_endpoint}"
)


def _unsync_samba_documents(connection, pgpt_endpoint, http_client):
def _unsync_samba_documents(connection, rag_endpoint, http_client):
try:
username = connection["user"]
password = connection["password"]
Expand All @@ -276,7 +276,7 @@ def _unsync_samba_documents(connection, pgpt_endpoint, http_client):
try:
util.un_ingest_file(
http_client=http_client,
endpoint=pgpt_endpoint,
endpoint=rag_endpoint,
doc_id=document_data["doc_id"],
)
except:
Expand All @@ -285,6 +285,6 @@ def _unsync_samba_documents(connection, pgpt_endpoint, http_client):
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
_LOG.info(f"Completed unsyncing files from endpoint {pgpt_endpoint}")
_LOG.info(f"Completed unsyncing files from endpoint {rag_endpoint}")
except:
_LOG.warn("Error fetching and updating documents", exc_info=True)
20 changes: 10 additions & 10 deletions nesis/api/core/document_loaders/sharepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@

def fetch_documents(**kwargs) -> None:
try:
pgpt_endpoint = kwargs["pgpt_endpoint"]
rag_endpoint = kwargs["rag_endpoint"]
connection = kwargs["connection"]

_sync_sharepoint_documents(
**kwargs, connection=connection, pgpt_endpoint=pgpt_endpoint
**kwargs, connection=connection, rag_endpoint=rag_endpoint
)
_unsync_sharepoint_documents(
**kwargs, connection=connection, pgpt_endpoint=pgpt_endpoint
**kwargs, connection=connection, rag_endpoint=rag_endpoint
)
except:
_LOG.exception("Error fetching sharepoint documents")
Expand All @@ -55,13 +55,13 @@ def _sync_sharepoint_documents(**kwargs):

file_locations = doc_store_connection.get("file_locations")

pgpt_endpoint = kwargs["pgpt_endpoint"]
rag_endpoint = kwargs["rag_endpoint"]

location_names = file_locations.split(",")
work_dir = f"/tmp/{uuid.uuid4()}"
pathlib.Path(work_dir).mkdir(parents=True)

_LOG.info(f"Initializing sharepoint syncing to endpoint {pgpt_endpoint}")
_LOG.info(f"Initializing sharepoint syncing to endpoint {rag_endpoint}")

# Sharepoint stores documents in different ways, in folders and there is a concept of lists Most document
# libraries are stores as lists, thus we can safely assume that the locations provided in the settings are
Expand Down Expand Up @@ -108,7 +108,7 @@ def _sync_sharepoint_documents(**kwargs):
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{pgpt_endpoint}/v1/ingest/{document_data['doc_id']}"
url=f"{rag_endpoint}/v1/ingest/{document_data['doc_id']}"
)
except:
_LOG.warn(
Expand All @@ -123,7 +123,7 @@ def _sync_sharepoint_documents(**kwargs):
)

response = http_client.upload(
url=f"{pgpt_endpoint}/v1/ingest",
url=f"{rag_endpoint}/v1/ingest",
filepath=downloaded_file_name,
field="file",
)
Expand Down Expand Up @@ -155,7 +155,7 @@ def _sync_sharepoint_documents(**kwargs):
f"Error when getting and ingesting file {file.name} - {ex}"
)

_LOG.info(f"Completed syncing to endpoint {pgpt_endpoint}")
_LOG.info(f"Completed syncing to endpoint {rag_endpoint}")

except:
_LOG.warn("Error fetching and updating documents", exc_info=True)
Expand All @@ -165,7 +165,7 @@ def _unsync_sharepoint_documents(**kwargs):
global _sharepoint_context
http_client: http.HttpClient = kwargs["http_client"]
doc_store_connection = kwargs["connection"]
pgpt_endpoint = kwargs["pgpt_endpoint"]
rag_endpoint = kwargs["rag_endpoint"]

try:
site_url = doc_store_connection.get("site_url")
Expand Down Expand Up @@ -193,7 +193,7 @@ def _unsync_sharepoint_documents(**kwargs):
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{pgpt_endpoint}/v1/ingest/{document_data['doc_id']}"
url=f"{rag_endpoint}/v1/ingest/{document_data['doc_id']}"
)
except:
_LOG.warn(
Expand Down
10 changes: 5 additions & 5 deletions nesis/api/core/tasks/document_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,36 @@
def fetch_documents(**kwargs) -> None:
try:
config = kwargs["config"] or {}
pgpt_endpoint = (config.get("llm") or {}).get("endpoint")
rag_endpoint = (config.get("rag") or {}).get("endpoint")
http_client = kwargs["http_client"]
datasource_list: List[Datasource] = DatasourceService.get_datasources()

for datasource in datasource_list:
if datasource.type == DatasourceType.MINIO:
s3_documents.fetch_documents(
connection=datasource.connection,
pgpt_endpoint=pgpt_endpoint,
rag_endpoint=rag_endpoint,
http_client=http_client,
metadata={"datasource": datasource.name},
)
if datasource.type == DatasourceType.SHAREPOINT:
sharepoint_documents.fetch_documents(
**kwargs,
connection=datasource.connection,
pgpt_endpoint=pgpt_endpoint,
rag_endpoint=rag_endpoint,
metadata={"datasource": datasource.name}
)
if datasource.type == DatasourceType.GOOGLE_DRIVE:
google_drive.fetch_documents(
connection=datasource.connection,
llm_endpoint=pgpt_endpoint,
llm_endpoint=rag_endpoint,
http_client=http_client,
metadata={"datasource": datasource.name},
)
if datasource.type == DatasourceType.WINDOWS_SHARE:
samba.fetch_documents(
connection=datasource.connection,
llm_endpoint=pgpt_endpoint,
llm_endpoint=rag_endpoint,
http_client=http_client,
metadata={"datasource": datasource.name},
)
Expand Down
2 changes: 1 addition & 1 deletion nesis/api/tests/core/document_loaders/test_minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_fetch_documents(minio_instance: mock.MagicMock, session: Session) -> No
connection=data["connection"],
http_client=http_client,
metadata={"datasource": "documents"},
pgpt_endpoint="http://localhost:8080",
rag_endpoint="http://localhost:8080",
)

_, upload_kwargs = http_client.upload.call_args_list[0]
Expand Down
4 changes: 3 additions & 1 deletion nesis/rag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,7 @@
PRETTY_LOG_FORMAT = (
"%(asctime)s.%(msecs)03d [%(levelname)-8s] %(name)+25s - %(message)s"
)
logging.basicConfig(level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt="%H:%M:%S")
logging.basicConfig(
level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt="%Y-%m-%d %H:%M:%S"
)
logging.captureWarnings(True)
8 changes: 5 additions & 3 deletions package.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
while read v; do
docker build -t ametnes/nesis:"$v"-frontend . -f nesis/frontend/Dockerfile
done <version.txt
while read -r v; do
docker build -t ametnes/nesis:"$v"-rag . -f nesis/rag/Dockerfile
docker build -t ametnes/nesis:"$v"-api . -f nesis/api/Dockerfile
docker build --build-arg PUBLIC_URL=/ --build-arg PROFILE=PROD -t ametnes/nesis:"$v"-frontend . -f nesis/frontend/Dockerfile
done <version.txt

0 comments on commit 9d458a0

Please sign in to comment.