diff --git a/.github/workflows/test-generate-files.yml b/.github/workflows/test-generate-files.yml new file mode 100644 index 0000000..35f2f80 --- /dev/null +++ b/.github/workflows/test-generate-files.yml @@ -0,0 +1,23 @@ +name: Test Generate Files + +on: + push: + branches: + - 'main' + pull_request: + branches: + - '*' + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Run test-generate-files.sh + run: | + chmod +x tests/test-generate-files.sh + ./tests/test-generate-files.sh \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c46ea5d --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +gpt-values-override-conf.*.sh +!gpt-values-override-conf.dist.sh +out/* +!out/.gitkeep diff --git a/README.md b/README.md new file mode 100644 index 0000000..fb3fc89 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ + +# AI-Memory + +**Elasticsearch API and GPT Model** +-------------------------------- + +**Overview** +------------ + +This project utilizes an Elasticsearch API and a GPT model to store and manage a chronological repository of information about specific topics, activities, and interactions. The GPT model functions as an extended memory system, or Retriever-Augmented Generator (RAG), to provide suggestions, manage tasks, and offer reminders. + +**Key Features** +---------------- + +* **Chronological Tracking**: The model tracks the addition and modification of information, allowing it to understand the sequence of events or data entries. +* **Information Retrieval**: The model can efficiently retrieve information from Elasticsearch using queries that might involve specific dates, topics, or statuses. +* **Decision Making**: Based on retrieved data, the model generates reasoned responses that consider historical data. +* **Assistant Capabilities**: The model provides suggestions, manages tasks, and offers reminders. + +**Usage** +--------- + +* **Elasticsearch API**: The API is used to store and manage data. +* **GPT Model**: The model is used to generate responses and provide suggestions, and can be interacted with using natural language inputs. + +**Guidelines** +------------- + +* **Personal Info**: When searching or creating documents it refers to yourself. +* **Knowledge Base**: It always uses the knowledge base or the Elasticsearch database to understand better the requests. +* **Custom Mappings (experimental)**: It uses the `x-elasticsearch-type` property to configure custom mappings for the index, allowing for the specification of Elasticsearch data types for each field. + +**License** +---------- + +This project is licensed under MIT license. diff --git a/generate-files.sh b/generate-files.sh new file mode 100755 index 0000000..c5c864b --- /dev/null +++ b/generate-files.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") + +# Function to process each configuration file +process_conf_file() { + local conf_file=$1 + local my_id=$2 + + echo "Processing $conf_file with ID $my_id..." + + # Load the override configuration file + if [ -f "$conf_file" ] && [ -s "$conf_file" ]; then + echo "Loading $conf_file..." + source "$conf_file" + fi + + # Read the default configuration file and store the variables + dist_vars=() + if [ -f "gpt-values-override-conf.dist.sh" ]; then + while IFS= read -r line; do + # Skip commented lines + if [[ $line == \#* ]]; then + continue + fi + + # Remove 'export' if it exists + line=$(echo "$line" | sed 's/^export //') + + # Extract the variable name and value + VAR_NAME=$(echo "$line" | cut -d'=' -f 1) + VAR_VALUE=$(echo "$line" | cut -d'=' -f 2-) + + # Store the variable in the array + dist_vars+=("$VAR_NAME") + + # Check if the variable name is not empty + if [ -n "$VAR_NAME" ]; then + # Check if the variable is set + if [ -z "${!VAR_NAME}" ]; then + echo -e "\033[33mWarning: The variable $VAR_NAME is not defined in $conf_file. The fallback value will be used.\033[0m" + declare -x "$VAR_NAME=$VAR_VALUE" + fi + fi + done <"gpt-values-override-conf.dist.sh" + fi + + # Check for variables in the override file that are not in the dist file + if [ -f "$conf_file" ] && [ -s "$conf_file" ]; then + while IFS= read -r line; do + # Skip commented lines + if [[ $line == \#* ]]; then + continue + fi + + # Remove 'export' if it exists + line=$(echo "$line" | sed 's/^export //') + + # Extract the variable name + VAR_NAME=$(echo "$line" | cut -d'=' -f 1) + + # Check if the variable is not in the dist file + if [ -n "$VAR_NAME" ]; then + found=false + for var in "${dist_vars[@]}"; do + if [ "$var" == "$VAR_NAME" ]; then + found=true + break + fi + done + if [ "$found" == false ]; then + echo -e "\033[33mWarning: The variable $VAR_NAME is defined in $conf_file but not in gpt-values-override-conf.dist.sh.\033[0m" + fi + fi + done <"$conf_file" + fi + + # Replace placeholders in the files using envsubst + envsubst "out/gpt-schema.$my_id.yml" + envsubst "out/gpt-instructions.$my_id.md" + + echo "Files gpt-schema.$my_id.yml and gpt-instructions.$my_id.md have been generated." +} + +# Loop over all configuration files, skipping the .dist.sh file +for conf_file in gpt-values-override-conf.*.sh; do + # Skip the .dist.sh file + if [[ "$conf_file" == *".dist.sh" ]]; then + continue + fi + + # Extract the [my_id] part from the filename + my_id=$(echo "$conf_file" | sed 's/^gpt-values-override-conf\.//;s/\.sh$//') + + # Process the configuration file + process_conf_file "$conf_file" "$my_id" +done diff --git a/gpt-instructions.dist.md b/gpt-instructions.dist.md new file mode 100644 index 0000000..9a4af5f --- /dev/null +++ b/gpt-instructions.dist.md @@ -0,0 +1,142 @@ +# MyElasticSearch Documentation + +## Purpose + +The primary goal of this GPT model is to function as an extended memory system, or Retriever-Augmented Generator (RAG). It stores and manages a chronological repository of information about specific topics, activities, and interactions, supporting decision-making, task management, and generating contextually relevant responses. + +## Personal info and how to answer + +I'm ${AI_MEMORY_PERSONAL_NAME}. When you have to search or create documents related to me, refer to my name. Also, always use your knowledge base or the ElasticSearch database to understand better my requests + +${AI_MEMORY_EXTRA_PERSONAL_INFO} + +## Key Functionalities + +### Chronological Tracking + +The model tracks the addition and modification of information, allowing it to understand the sequence of events or data entries. This tracking ensures that responses are based on the latest and most relevant data. + +### Information Retrieval + +The model can efficiently retrieve information from Elasticsearch using queries that might involve specific dates, topics, or statuses. This ability allows the model to act as an intelligent query handler. + +### Decision Making + +Based on retrieved data, the model generates reasoned responses that consider historical data. This helps in providing suggestions, managing tasks, and offering reminders. + +### Assistant Capabilities + +The model acts as a virtual assistant, using stored information to manage tasks, documents, and reminders, and provides alerts or suggestions based on past inputs and upcoming deadlines. + +## Document Management and Versioning + +### In-Document Versioning with `revisions` + +All updates to a document are handled by copying the old properties into a `revisions` field within the same document. This ensures a unified document structure and enhances reliability. + +- **`__meta_revisions`**: An array where each element contains a snapshot of the document's properties before the latest update. Each entry in `__meta_revisions` should include: + - **@timestamp**: The date and time when the revision was created. + - **content**: The content of the document before the update. + - **other relevant fields**: Any other fields that have changed since the last revision. + +## Understanding the Schema + +### Indexed Fields + +These fields should be indexed for efficient querying and retrieval: + +- **`@timestamp`**: The current date and time when creating or updating a document. +- **`type`**: Specifies the category of the document (e.g., "reminder", "file"). +- **`content`**: Contains the main content or details of the document. +- **`tag`**: Tags used for categorization and future retrieval. +- **`status`**: Reflects the current state of the document (e.g., active, in_progress, done, etc.). +- **`start_date / end_date`**: Specifies the start and end dates if applicable.. + +### Non-Indexed Fields + +These fields do not need to be indexed. To differentiate them from indexed fields, they should be prefixed with `__meta_`: + +- **`__meta_disabled`**: Used to deactivate or archive documents. +- **`__meta_update_reason`**: Provides the rationale behind any updates made to the document. +- **`__meta_revisions`**: Stores previous versions of the document's content and other relevant fields. +- **`__meta_document_ref`**: Links to any related documents by their Document ID(s). + +## Operations on Documents + +### Searching for Documents + +#### Constructing Queries: + +- Formulate queries based on keywords, document types, tags, or other criteria. +- Queries should be sent as POST requests to `/index-ai-memory-\*/_search`. +- Apply filters to refine search results, such as filtering out deactivated documents using `__meta_disabled`. +- Sort results based on relevance, date, or other criteria to prioritize the most relevant information. + +### Adding or Updating a Document + +#### Required Fields: + +- Include `@timestamp`, `type`, and `content` as mandatory fields. +- Determine appropriate tags and document type based on the context provided. +- If there are related documents, link them using the `__meta_document_ref` field. +- If adding a new document, generate a JSON payload and submit it as a POST request to `/index-ai-memory-default/_doc/`. +- New documents should have their status set to "active" unless specified otherwise. +- Ensure that the `@timestamp` field reflects the current date and time. + +#### Updating Existing Documents: + +- **When updating one or more existing documents,** **copy only the changed properties** (such as `content`, `status`, etc.) **to `__meta_revisions` before applying any changes.** This should be done using a script or within the update process to ensure historical data is preserved. + +example of the script: + +``` +{ + "id": "B4Qtb5EByKxxX0hsdDZy", + "script": { + "source": "def revision = [:]; revision.status = ctx._source.status; revision.__meta_update_reason = ctx._source.__meta_update_reason; revision['@timestamp'] = ctx._source['@timestamp']; if (ctx._source.__meta_revisions == null) { ctx._source.__meta_revisions = []; } ctx._source.__meta_revisions.add(revision); ctx._source.status = 'in_progress'; ctx._source.__meta_update_reason = 'Changed the status to in_progress'; ctx._source['@timestamp'] = '2024-08-20T10:30:00Z';" + } +} +``` + +### Deactivating a Document + +#### Deactivation: + +- Instead of deleting documents, set the `__meta_disabled` field to "true" to deactivate them. + +## Dynamic Field Management + +The system can dynamically add as many fields as needed when they contain metadata that is useful to keep separated from the content. + +### Configuring Index Mappings (experimental) + +Use the `/index-ai-memory-default/_mapping` path to define or update custom mappings for your index. Mappings determine how fields are stored and indexed, which is crucial for efficient data retrieval. + +#### When to Use: + +- **New Index Setup**: Define mappings when creating a new index. +- **Updating Mappings**: Modify mappings as your data model evolves. +- **Optimizing Queries**: Improve search performance by fine-tuning field types and indexing strategies. + +#### How to Use: + +- **Define Mappings**: Create a JSON payload under `properties` with the desired field types. +- **Send Request**: Use a PUT request to apply mappings to the index. + +##### Using `x-elasticsearch-type` for Custom Mappings + +To configure custom mappings for your index, use the `x-elasticsearch-type` property to specify the Elasticsearch data type for each field. This allows you to define how each field should be indexed and stored. + +###### When to Use: + +- **Mapping New Fields**: Use `x-elasticsearch-type` when defining the fields in your schema to specify how Elasticsearch should handle them. +- **Customizing Data Types**: Use this property to ensure that fields are indexed correctly according to your application's needs. + +###### Supported Types: + +- **text**: Used for full-text search fields. +- **keyword**: Used for exact match search fields. +- **date**: Used for date and time fields. +- **boolean**: Used for true/false values. +- **object**: Used for nested objects. +- **other types**: Refer to Elasticsearch documentation for additional supported types. \ No newline at end of file diff --git a/gpt-schema.dist.yml b/gpt-schema.dist.yml new file mode 100644 index 0000000..93304ea --- /dev/null +++ b/gpt-schema.dist.yml @@ -0,0 +1,424 @@ +--- +openapi: 3.1.0 +info: + version: 1.0.0 + title: Elasticsearch API + license: + name: MIT +servers: + - url: ${AI_MEMORY_ELASTIC_SEARCH_URL} + +paths: + /${AI_MEMORY_ELASTIC_SEARCH_INDEX}/_doc/: + post: + summary: Add a new document. The content field is always required. + operationId: addDocument + x-openai-isConsequential: false + tags: + - document + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/Document" + responses: + "201": + description: Document added successfully + content: + application/json: + schema: + type: object + properties: + _id: + type: string + description: The ID of the newly created document + default: + description: Unexpected error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + "/${AI_MEMORY_ELASTIC_SEARCH_INDEX}/_update/{id}": + post: + summary: Update a document by ID + operationId: updateDocument + x-openai-isConsequential: false + tags: + - document + parameters: + - name: id + in: path + required: true + schema: + type: string + description: Document ID + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DocumentUpdate" + responses: + "200": + description: Document updated successfully + default: + description: Unexpected error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /${AI_MEMORY_ELASTIC_SEARCH_INDEX}/_update_by_query: + post: + summary: Bulk update documents by query + operationId: bulkUpdateDocuments + x-openai-isConsequential: false + tags: + - document + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + query: + type: object + description: Elasticsearch query to match documents for updating + example: + match: + status: pending + script: + type: object + properties: + source: + type: string + description: Script to execute on matched documents + example: ctx._source.status = 'completed'; + params: + type: object + additionalProperties: true + description: Parameters for the script + required: + - query + - script + responses: + "200": + description: Documents updated successfully + default: + description: Unexpected error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /index-ai-memory-*/_search: + post: + summary: Search for documents + operationId: searchDocuments + x-openai-isConsequential: false + tags: + - search + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + query: + type: object + properties: + query_string: + type: object + properties: + query: + type: string + description: Search keywords + required: + - query + required: + - query_string + size: + type: integer + description: Number of search results to return + example: 5 + required: + - query + - size + responses: + "200": + description: Search results + content: + application/json: + schema: + $ref: "#/components/schemas/SearchResults" + default: + description: Unexpected error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /${AI_MEMORY_ELASTIC_SEARCH_INDEX}/_mapping: + put: + summary: Configure the index with custom mappings + operationId: configureIndex + x-openai-isConsequential: false + tags: + - index + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + properties: + type: object + properties: + content: + type: string + description: Contains the main content or details of the document. + x-elasticsearch-type: text + peopleInvolved: + type: string + description: List of people involved in the memory or event. + x-elasticsearch-type: keyword + priority: + type: string + description: Priority level of the task or reminder. + x-elasticsearch-type: keyword + start_date: + type: string + format: date-time + description: The start date or time associated with the document. + x-elasticsearch-type: date + end_date: + type: string + format: date-time + description: The end date or time associated with the document. + x-elasticsearch-type: date + status: + type: string + description: The current status of the document. + x-elasticsearch-type: keyword + location: + type: string + description: Location where the memory or event occurred. + x-elasticsearch-type: keyword + visitedWith: + type: string + description: List of people who visited the location with you. + x-elasticsearch-type: keyword + author: + type: string + description: Author of the quote. + x-elasticsearch-type: keyword + context: + type: string + description: Context or occasion where the quote was said. + x-elasticsearch-type: text + "@timestamp": + type: string + format: date-time + description: The timestamp when the document was created or updated. + x-elasticsearch-type: date + type: + type: string + description: Defines the category or type of the document. + x-elasticsearch-type: keyword + tag: + type: string + description: Tags associated with the document for easier categorization. + x-elasticsearch-type: keyword + __meta_disabled: + type: boolean + description: Flag indicating whether the document is disabled or inactive. + x-elasticsearch-type: boolean + __meta_update_reason: + type: string + description: Description of why the document was updated. + x-elasticsearch-type: text + __meta_revisions: + type: object + additionalProperties: false + description: Stores previous versions of the document's content. + x-elasticsearch-type: object + __meta_document_ref: + type: string + description: List of document IDs that this document refers to. + x-elasticsearch-type: keyword + responses: + "200": + description: Index configured successfully + default: + description: Unexpected error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + +components: + schemas: + Document: + type: object + properties: + content: + type: string + description: Contains the main content or details of the document. + + # Memory/Achievement/Reminder/Todo-specific Properties + + peopleInvolved: + type: array + items: + type: string + description: "List of people involved in the memory or event." + + priority: + type: string + description: "Priority level of the task or reminder (e.g., high, medium, low)." + + start_date: + type: string + format: "date-time" + description: + The start date or time associated with the document, such as the + beginning of an event or the due date of a task. + end_date: + type: string + format: "date-time" + description: + The end date or time associated with the document, such as the end + of an event or the deadline of a task. + + status: + type: string + description: The current status of the document + + location: + type: string + description: "Location where the memory or event occurred." + + # Location-specific Properties + visitedWith: + type: array + items: + type: string + description: "List of people who visited the location with you." + + # Quote-specific Properties + author: + type: string + description: "Author of the quote." + context: + type: string + description: "Context or occasion where the quote was said." + + # Internal properties + + "@timestamp": + type: string + description: The timestamp when the document was created or updated. + format: date-time + + type: + type: string + description: + Defines the category or type of the document, such as reminder, + achievement, file, location, event, task, etc. + tag: + type: array + items: + type: string + description: "Tags associated with the document for easier categorization." + __meta_revisions: + type: array + items: + type: object + additionalProperties: true + properties: + timestamp: + type: string + format: date-time + __meta_update_reason: + type: string + description: Description of why the document was updated. + description: + Description of why the document was updated, providing context for + the changes made. + __meta_document_ref: + type: array + items: + type: string + description: + List of document IDs that this document refers to, enabling rich + inter-document relationships. + __meta_disabled: + type: boolean + description: Flag indicating whether the document is disabled or inactive. + + required: + - type + - content + - status + - "@timestamp" + - tag + + DocumentUpdate: + type: object + properties: + doc: + type: object + additionalProperties: true + description: Fields to update in the document. + upsert: + type: object + additionalProperties: true + description: Document to insert if it does not exist. + script: + type: object + properties: + source: + type: string + description: Inline script to execute on the document. + oneOf: + - required: + - script + - required: + - doc + - required: + - upsert + + SearchResults: + type: object + properties: + results: + type: array + items: + type: object + properties: + _id: + type: string + description: Document ID + _source: + $ref: "#/components/schemas/Document" + description: Array of documents that match the search criteria + + Error: + type: object + required: + - code + - message + properties: + code: + type: integer + format: int32 + message: + type: string + context: + type: string + description: Detailed context about where the error occurred. diff --git a/gpt-values-override-conf.dist.sh b/gpt-values-override-conf.dist.sh new file mode 100644 index 0000000..66bfaa1 --- /dev/null +++ b/gpt-values-override-conf.dist.sh @@ -0,0 +1,21 @@ +# Environment variables for the AI-Memory application + +# Elasticsearch URL +# This is the base URL for your Elasticsearch instance +# Example: https://your-elastic-search-url +export AI_MEMORY_ELASTIC_SEARCH_URL="https://your-elastic-search-url" + +# Elasticsearch index name +# This is the name of the index where the documents will be stored +# Example: index-ai-memory-default +export AI_MEMORY_ELASTIC_SEARCH_INDEX="index-ai-memory-default" + +# Personal name +# This is the name that will be used in the model's responses +# Example: Foo bar +export AI_MEMORY_PERSONAL_NAME="Foo bar" + +# Extra personal information +# This is additional information that will be used in the model's responses +# Example: Interaction languages: English +export AI_MEMORY_EXTRA_PERSONAL_INFO="Interaction languages: English" diff --git a/out/.gitkeep b/out/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/test-generate-files.sh b/tests/test-generate-files.sh new file mode 100755 index 0000000..5ed4614 --- /dev/null +++ b/tests/test-generate-files.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bats + +# Setup function to create necessary files before each test +setup() { + mkdir -p out + echo 'export AI_MEMORY_ELASTIC_SEARCH_URL="https://hw2nl.ddns.net/elasticsearch"' >gpt-values-override-conf.default.sh + echo 'export AI_MEMORY_ELASTIC_SEARCH_INDEX="index-ai-memory-default"' >>gpt-values-override-conf.default.sh + echo 'export AI_MEMORY_PERSONAL_NAME="Test"' >>gpt-values-override-conf.default.sh + echo 'export AI_MEMORY_EXTRA_PERSONAL_INFO="Interaction languages: English"' >>gpt-values-override-conf.default.sh + + echo 'export AI_MEMORY_ELASTIC_SEARCH_URL="https://default-url.com"' >gpt-values-override-conf.dist.sh + echo 'export AI_MEMORY_ELASTIC_SEARCH_INDEX="default-index"' >>gpt-values-override-conf.dist.sh + + cp ../gpt-schema.dist.yml ./gpt-schema.dist.yml + cp ../gpt-instructions.dist.md ./gpt-instructions.dist.md + cp ../gpt-values-override-conf.default.sh ./gpt-values-override-conf.default.sh +} + +# Teardown function to clean up after each test +teardown() { + rm -rf out/ + rm gpt-values-override-conf.default.sh + rm gpt-values-override-conf.dist.sh + + rm gpt-schema.dist.yml + rm gpt-instructions.dist.md +} + +@test "Check if the script processes the default configuration file" { + run bash ../generate-files.sh + [ "$status" -eq 0 ] + [ -f "../out/gpt-schema.default.yml" ] + [ -f "../out/gpt-instructions.default.md" ] +} + +@test "Check if warnings are displayed for missing variables in dist file" { + run bash ../generate-files.sh + [ "$status" -eq 0 ] + [[ "$output" == *"Warning: The variable AI_MEMORY_PERSONAL_NAME is defined in gpt-values-override-conf.default.sh but not in gpt-values-override-conf.dist.sh."* ]] + [[ "$output" == *"Warning: The variable AI_MEMORY_EXTRA_PERSONAL_INFO is defined in gpt-values-override-conf.default.sh but not in gpt-values-override-conf.dist.sh."* ]] +} + +@test "Check if fallback values are used when variables are not defined in override file" { + echo 'export AI_MEMORY_ELASTIC_SEARCH_INDEX=""' >gpt-values-override-conf.default.sh + run bash ../generate-files.sh + [ "$status" -eq 0 ] + [[ "$output" == *"Warning: The variable AI_MEMORY_ELASTIC_SEARCH_INDEX is not defined in gpt-values-override-conf.default.sh. The fallback value will be used."* ]] +}