Skip to content

Commit

Permalink
feat(api)(rag): add text extraction api (#137)
Browse files Browse the repository at this point in the history
This PR adds a text extraction API to the RAG service.

Part of #108
  • Loading branch information
mawandm authored Jul 1, 2024
1 parent 0fd2e48 commit fafb047
Show file tree
Hide file tree
Showing 34 changed files with 1,224 additions and 393 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""add extract management fields to document
Revision ID: ea840c8d9e58
Revises: 7cfa662dff86
Create Date: 2024-07-01 12:29:36.310411
"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "ea840c8d9e58"
down_revision: Union[str, None] = "7cfa662dff86"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

document_status = postgresql.ENUM(
"SUCCESS", "PROCESSING", "ERROR", name="document_status"
)


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
document_status.create(op.get_bind())

op.add_column(
"document",
sa.Column(
"extract_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
)
op.add_column(
"document", sa.Column("datasource_id", sa.Unicode(length=255), nullable=True)
)
op.add_column(
"document",
sa.Column(
"status",
sa.Enum("SUCCESS", "PROCESSING", "ERROR", name="document_status"),
nullable=False,
),
)
op.add_column("document", sa.Column("last_modified", sa.DateTime(), nullable=False))
op.add_column(
"document", sa.Column("last_processed", sa.DateTime(), nullable=False)
)
op.add_column(
"document", sa.Column("last_processed_message", sa.Text(), nullable=True)
)
op.alter_column(
"document",
"base_uri",
existing_type=sa.VARCHAR(length=255),
type_=sa.Unicode(length=4096),
existing_nullable=False,
)
op.alter_column(
"document",
"filename",
existing_type=sa.VARCHAR(length=255),
type_=sa.Unicode(length=4096),
existing_nullable=False,
)
op.alter_column(
"document",
"rag_metadata",
existing_type=postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"document",
"rag_metadata",
existing_type=postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
)
op.alter_column(
"document",
"filename",
existing_type=sa.Unicode(length=4096),
type_=sa.VARCHAR(length=255),
existing_nullable=False,
)
op.alter_column(
"document",
"base_uri",
existing_type=sa.Unicode(length=4096),
type_=sa.VARCHAR(length=255),
existing_nullable=False,
)
op.drop_column("document", "last_processed_message")
op.drop_column("document", "last_processed")
op.drop_column("document", "last_modified")
op.drop_column("document", "status")
op.drop_column("document", "datasource_id")
op.drop_column("document", "extract_metadata")

document_status.drop(op.get_bind())
# ### end Alembic commands ###
2 changes: 1 addition & 1 deletion nesis/api/core/controllers/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from . import GET, POST, DELETE, PUT
from .api import app, error_message

from nesis.api.core.models.entities import Module
from nesis.api.core.models.objects import Module
import logging
import nesis.api.core.services as services
import nesis.api.core.services.util as util
Expand Down
Loading

0 comments on commit fafb047

Please sign in to comment.