Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add model classes for renku storing entities in solr #624

Merged
merged 19 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
"command": "alias k=kubectl"
},
"ghcr.io/devcontainers-contrib/features/rclone:1": {},
"./k3d": {}
"./k3d": {},
"ghcr.io/devcontainers/features/java:1": {
"version": "21",
"jdkDistro": "open"
},
"./solr": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/devcontainers-contrib/features/poetry",
Expand Down
14 changes: 14 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ services:
depends_on:
- db
- authz
- solr

db:
image: postgres:latest
Expand Down Expand Up @@ -70,5 +71,18 @@ services:
command:
- serve

solr:
image: solr:latest
restart: unless-stopped
volumes:
- solr_data:/var/solr
command:
- bash
- -c
- 'precreate-core renku-search-dev; exec solr -f -Dsolr.modules=analysis-extras'
ports:
- "8983:8983"

volumes:
postgres-data:
solr_data:
18 changes: 18 additions & 0 deletions .devcontainer/solr/devcontainer-feature.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "solr",
"id": "solr",
"version": "1.0.0",
"description": "A feature adding solr",
"options": {
"solr_version": {
"type": "string",
"description": "solr version to install",
"proposals": ["9.8.0"],
"default": "9.8.0"
}
},
"installsAfter": [
"ghcr.io/devcontainers/features/java",
"ghcr.io/devcontainers-contrib/features/bash-command"
],
}
16 changes: 16 additions & 0 deletions .devcontainer/solr/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/sh
set -e

echo "Activation SOLR feature"
eikek marked this conversation as resolved.
Show resolved Hide resolved

download_url="https://dlcdn.apache.org/solr/solr/$SOLR_VERSION/solr-$SOLR_VERSION.tgz"

curl -sSL -o solr.tgz "$download_url"
mkdir -p /opt
tar -C /opt -xzf solr.tgz
ln -snf "/opt/solr-$SOLR_VERSION" /opt/solr
ln -snf /opt/solr/bin/solr /usr/local/bin/solr
mkdir -p /opt/solr/server/logs
chmod 777 /opt/solr/server/logs
chmod 777 /opt/solr/bin
chown -R vscode:vscode "/opt/solr-$SOLR_VERSION"
1 change: 1 addition & 0 deletions components/renku_data_services/solr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Client library to solr and mapping for renku entities."""
152 changes: 152 additions & 0 deletions components/renku_data_services/solr/entity_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Defines the entity documents used with Solr."""

from abc import abstractmethod
from datetime import UTC, datetime
from enum import StrEnum
from typing import Annotated, Any, Self

from pydantic import AliasChoices, BaseModel, BeforeValidator, Field, field_serializer, field_validator
from ulid import ULID

from renku_data_services.authz.models import Visibility
from renku_data_services.base_models.core import Slug
from renku_data_services.solr.solr_client import DocVersion, ResponseBody


def _str_to_slug(value: Any) -> Any:
if isinstance(value, str):
return Slug.from_name(value)
else:
return value
eikek marked this conversation as resolved.
Show resolved Hide resolved


class EntityType(StrEnum):
"""The different type of entities available from search."""

project = "Project"
user = "User"
group = "Group"


class EntityDoc(BaseModel, frozen=True):
"""Base class for entity document models."""

namespace: Annotated[Slug, BeforeValidator(_str_to_slug)]
version: int = Field(
serialization_alias="_version_",
validation_alias=AliasChoices("version", "_version_"),
default=DocVersion.not_exists.value,
)
score: float | None = None

@abstractmethod
def entity_type(self) -> EntityType:
"""Return the type of this entity."""
...
olevski marked this conversation as resolved.
Show resolved Hide resolved

def to_dict(self) -> dict[str, Any]:
"""Return the dict of this group."""
dict = self.model_dump(by_alias=True, exclude_defaults=True)
# note: _kind=fullentity is for being backwards compatible, it might not be needed in the future
dict.update(_type=self.entity_type().value, _kind="fullentity")
return dict

def reset_solr_fields(self) -> Self:
"""Resets fields that are filled by solr when querying."""
return self.model_copy(update={"version": DocVersion.not_exists.value, "score": None})


class User(EntityDoc, frozen=True):
"""Represents a renku user in SOLR."""

id: str
firstName: str | None = None
lastName: str | None = None
eikek marked this conversation as resolved.
Show resolved Hide resolved

def entity_type(self) -> EntityType:
eikek marked this conversation as resolved.
Show resolved Hide resolved
"""Return the type of this entity."""
return EntityType.user

@field_serializer("namespace", when_used="always")
def __serialize_namespace(self, namespace: Slug) -> str:
return namespace.value

@classmethod
def from_dict(cls, d: dict[str, Any]) -> "User":
"""Create a User from a dictionary."""
return User.model_validate(d)


class Group(EntityDoc, frozen=True):
"""Represents a renku user in SOLR."""

id: ULID
name: str
description: str | None = None

def entity_type(self) -> EntityType:
"""Return the type of this entity."""
return EntityType.group

@field_serializer("id", when_used="always")
def __serialize_id(self, id: ULID) -> str:
return str(id)

@field_serializer("namespace", when_used="always")
def __serialize_namespace(self, namespace: Slug) -> str:
return namespace.value

@classmethod
def from_dict(cls, d: dict[str, Any]) -> "Group":
"""Create a Group from a dictionary."""
return Group.model_validate(d)


class Project(EntityDoc, frozen=True):
"""Represents a renku project in SOLR."""

id: ULID
name: str
slug: Annotated[Slug, BeforeValidator(_str_to_slug)]
visibility: Visibility
createdBy: str
creationDate: datetime
repositories: list[str] = Field(default_factory=list)
description: str | None = None
keywords: list[str] = Field(default_factory=list)
namespaceDetails: ResponseBody | None = None
creatorDetails: ResponseBody | None = None
olevski marked this conversation as resolved.
Show resolved Hide resolved

def entity_type(self) -> EntityType:
"""Return the type of this entity."""
return EntityType.project

@field_serializer("namespace", when_used="always")
def __serialize_namespace(self, namespace: Slug) -> str:
return namespace.value

@field_serializer("id", when_used="always")
def __serialize_id(self, id: ULID) -> str:
return str(id)

@field_serializer("slug", when_used="always")
def __serialize_slug(self, slug: Slug) -> str:
return slug.value

@field_serializer("visibility", when_used="always")
def __serialize_visibilty(self, visibility: Visibility) -> str:
return visibility.value

@field_serializer("creationDate", when_used="always")
def __serialize_creation_date(self, creationDate: datetime) -> str:
return creationDate.strftime("%Y-%m-%dT%H:%M:%SZ")

@field_validator("creationDate")
@classmethod
def _add_tzinfo(cls, v: datetime) -> datetime:
return v.replace(tzinfo=UTC)

@classmethod
def from_dict(cls, d: dict[str, Any]) -> "Project":
"""Create a Project from a dictionary."""
return Project.model_validate(d)
122 changes: 122 additions & 0 deletions components/renku_data_services/solr/entity_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Defines the solr schema used for the renku entities."""

from renku_data_services.solr.solr_migrate import SchemaMigration
from renku_data_services.solr.solr_schema import (
AddCommand,
Analyzer,
CopyFieldRule,
Field,
FieldName,
FieldType,
Filters,
SchemaCommand,
Tokenizers,
TypeName,
)


class Fields:
"""A collection of fields."""
eikek marked this conversation as resolved.
Show resolved Hide resolved

created_by = FieldName("createdBy")
creation_date = FieldName("creationDate")
description = FieldName("description")
entityType = FieldName("_type")
kind = FieldName("_kind")
firstName = FieldName("firstName")
id = FieldName("id")
lastName = FieldName("lastName")
members = FieldName("members")
name = FieldName("name")
repositories = FieldName("repositories")
slug = FieldName("slug")
visibility = FieldName("visibility")
keywords = FieldName("keywords")
namespace = FieldName("namespace")
contentAll = FieldName("content_all")
# virtual score field
score = FieldName("score")
eikek marked this conversation as resolved.
Show resolved Hide resolved


class Analyzers:
"""A collection of analyzers."""

textIndex = Analyzer(
eikek marked this conversation as resolved.
Show resolved Hide resolved
tokenizer=Tokenizers.uax29UrlEmail,
filters=[
Filters.lowercase,
Filters.stop,
Filters.english_minimal_stem,
Filters.ascii_folding,
Filters.edgeNgram(2, 8, True),
],
)

textQuery = Analyzer(
eikek marked this conversation as resolved.
Show resolved Hide resolved
tokenizer=Tokenizers.uax29UrlEmail,
filters=[
Filters.lowercase,
Filters.stop,
Filters.english_minimal_stem,
Filters.ascii_folding,
],
)


class FieldTypes:
"""A collection of field types."""

id: FieldType = FieldType.id(TypeName("SearchId")).make_doc_value()
string: FieldType = FieldType.str(TypeName("SearchString")).make_doc_value()
text: FieldType = (
FieldType.text(TypeName("SearchText"))
.with_index_analyzer(Analyzers.textIndex)
.with_query_analyzer(Analyzers.textQuery)
)
textAll: FieldType = (
FieldType.text(TypeName("SearchTextAll"))
.with_index_analyzer(Analyzers.textIndex)
.with_query_analyzer(Analyzers.textQuery)
.make_multi_valued()
)
dateTime: FieldType = FieldType.dateTimePoint(TypeName("SearchDateTime"))
eikek marked this conversation as resolved.
Show resolved Hide resolved


initial_entity_schema: list[SchemaCommand] = [
eikek marked this conversation as resolved.
Show resolved Hide resolved
AddCommand(FieldTypes.id),
AddCommand(FieldTypes.string),
AddCommand(FieldTypes.text),
AddCommand(FieldTypes.dateTime),
AddCommand(Field.of(Fields.entityType, FieldTypes.string)),
AddCommand(Field.of(Fields.kind, FieldTypes.string)),
AddCommand(Field.of(Fields.name, FieldTypes.text)),
AddCommand(Field.of(Fields.slug, FieldTypes.string)),
AddCommand(Field.of(Fields.repositories, FieldTypes.string).make_multi_valued()),
AddCommand(Field.of(Fields.visibility, FieldTypes.string)),
AddCommand(Field.of(Fields.description, FieldTypes.text)),
AddCommand(Field.of(Fields.created_by, FieldTypes.id)),
AddCommand(Field.of(Fields.creation_date, FieldTypes.dateTime)),
# text all
AddCommand(FieldTypes.textAll),
AddCommand(Field.of(Fields.contentAll, FieldTypes.textAll).make_multi_valued()),
AddCommand(CopyFieldRule(source=Fields.name, dest=Fields.contentAll)),
AddCommand(CopyFieldRule(source=Fields.description, dest=Fields.contentAll)),
AddCommand(CopyFieldRule(source=Fields.slug, dest=Fields.contentAll)),
AddCommand(CopyFieldRule(source=Fields.repositories, dest=Fields.contentAll)),
# user fields
AddCommand(Field.of(Fields.firstName, FieldTypes.string)),
AddCommand(Field.of(Fields.lastName, FieldTypes.string)),
AddCommand(CopyFieldRule(source=Fields.firstName, dest=Fields.contentAll)),
AddCommand(CopyFieldRule(source=Fields.lastName, dest=Fields.contentAll)),
# keywords
AddCommand(Field.of(Fields.keywords, FieldTypes.string).make_multi_valued()),
AddCommand(CopyFieldRule(source=Fields.keywords, dest=Fields.contentAll)),
# namespace
AddCommand(Field.of(Fields.namespace, FieldTypes.string)),
AddCommand(CopyFieldRule(source=Fields.namespace, dest=Fields.contentAll)),
]


all_migrations: list[SchemaMigration] = [
eikek marked this conversation as resolved.
Show resolved Hide resolved
SchemaMigration(version=9, commands=initial_entity_schema, requires_reindex=True)
]
Loading
Loading