Skip to content

Commit

Permalink
entity models, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eikek committed Jan 31, 2025
1 parent b5270c5 commit cd068fe
Show file tree
Hide file tree
Showing 8 changed files with 401 additions and 207 deletions.
152 changes: 152 additions & 0 deletions components/renku_data_services/solr/entity_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Defines the entity documents used with Solr."""

from abc import abstractmethod
from datetime import UTC, datetime
from enum import StrEnum
from typing import Annotated, Any, Self

from pydantic import AliasChoices, BaseModel, BeforeValidator, Field, field_serializer, field_validator
from ulid import ULID

from renku_data_services.authz.models import Visibility
from renku_data_services.base_models.core import Slug
from renku_data_services.solr.solr_client import DocVersion, ResponseBody


def _str_to_slug(value: Any) -> Any:
if isinstance(value, str):
return Slug.from_name(value)
else:
return value


class EntityType(StrEnum):
"""The different type of entities available from search."""

project = "Project"
user = "User"
group = "Group"


class EntityDoc(BaseModel, frozen=True):
"""Base class for entity document models."""

namespace: Annotated[Slug, BeforeValidator(_str_to_slug)]
version: int = Field(
serialization_alias="_version_",
validation_alias=AliasChoices("version", "_version_"),
default=DocVersion.not_exists.value,
)
score: float | None = None

@abstractmethod
def entity_type(self) -> EntityType:
"""Return the type of this entity."""
...

def to_dict(self) -> dict[str, Any]:
"""Return the dict of this group."""
dict = self.model_dump(by_alias=True, exclude_defaults=True)
# note: _kind=fullentity is for being backwards compatible, it might not be needed in the future
dict.update(_type=self.entity_type().value, _kind="fullentity")
return dict

def reset_solr_fields(self) -> Self:
"""Resets fields that are filled by solr when querying."""
return self.model_copy(update={"version": DocVersion.not_exists.value, "score": None})


class User(EntityDoc, frozen=True):
"""Represents a renku user in SOLR."""

id: str
firstName: str | None = None
lastName: str | None = None

def entity_type(self) -> EntityType:
"""Return the type of this entity."""
return EntityType.user

@field_serializer("namespace", when_used="always")
def __serialize_namespace(self, namespace: Slug) -> str:
return namespace.value

@classmethod
def from_dict(cls, d: dict[str, Any]) -> "User":
"""Create a User from a dictionary."""
return User.model_validate(d)


class Group(EntityDoc, frozen=True):
"""Represents a renku user in SOLR."""

id: ULID
name: str
description: str | None = None

def entity_type(self) -> EntityType:
"""Return the type of this entity."""
return EntityType.group

@field_serializer("id", when_used="always")
def __serialize_id(self, id: ULID) -> str:
return str(id)

@field_serializer("namespace", when_used="always")
def __serialize_namespace(self, namespace: Slug) -> str:
return namespace.value

@classmethod
def from_dict(cls, d: dict[str, Any]) -> "Group":
"""Create a Group from a dictionary."""
return Group.model_validate(d)


class Project(EntityDoc, frozen=True):
"""Represents a renku project in SOLR."""

id: ULID
name: str
slug: Annotated[Slug, BeforeValidator(_str_to_slug)]
visibility: Visibility
createdBy: str
creationDate: datetime
repositories: list[str] = Field(default_factory=list)
description: str | None = None
keywords: list[str] = Field(default_factory=list)
namespaceDetails: ResponseBody | None = None
creatorDetails: ResponseBody | None = None

def entity_type(self) -> EntityType:
"""Return the type of this entity."""
return EntityType.project

@field_serializer("namespace", when_used="always")
def __serialize_namespace(self, namespace: Slug) -> str:
return namespace.value

@field_serializer("id", when_used="always")
def __serialize_id(self, id: ULID) -> str:
return str(id)

@field_serializer("slug", when_used="always")
def __serialize_slug(self, slug: Slug) -> str:
return slug.value

@field_serializer("visibility", when_used="always")
def __serialize_visibilty(self, visibility: Visibility) -> str:
return visibility.value

@field_serializer("creationDate", when_used="always")
def __serialize_creation_date(self, creationDate: datetime) -> str:
return creationDate.strftime("%Y-%m-%dT%H:%M:%SZ")

@field_validator("creationDate")
@classmethod
def _add_tzinfo(cls, v: datetime) -> datetime:
return v.replace(tzinfo=UTC)

@classmethod
def from_dict(cls, d: dict[str, Any]) -> "Project":
"""Create a Project from a dictionary."""
return Project.model_validate(d)
42 changes: 25 additions & 17 deletions components/renku_data_services/solr/entity_schema.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Defines the solr schema used for the renku entities."""

from renku_data_services.solr.solr_migrate import SchemaMigration
from renku_data_services.solr.solr_schema import (
AddCommand,
Expand All @@ -14,27 +16,31 @@


class Fields:
created_by: FieldName = FieldName("createdBy")
creation_date: FieldName = FieldName("creationDate")
description: FieldName = FieldName("description")
entityType: FieldName = FieldName("_type")
kind: FieldName = FieldName("_kind")
firstName: FieldName = FieldName("firstName")
id: FieldName = FieldName("id")
lastName: FieldName = FieldName("lastName")
members: FieldName = FieldName("members")
name: FieldName = FieldName("name")
repositories: FieldName = FieldName("repositories")
slug: FieldName = FieldName("slug")
visibility: FieldName = FieldName("visibility")
keywords: FieldName = FieldName("keywords")
namespace: FieldName = FieldName("namespace")
contentAll: FieldName = FieldName("content_all")
"""A collection of fields."""

created_by = FieldName("createdBy")
creation_date = FieldName("creationDate")
description = FieldName("description")
entityType = FieldName("_type")
kind = FieldName("_kind")
firstName = FieldName("firstName")
id = FieldName("id")
lastName = FieldName("lastName")
members = FieldName("members")
name = FieldName("name")
repositories = FieldName("repositories")
slug = FieldName("slug")
visibility = FieldName("visibility")
keywords = FieldName("keywords")
namespace = FieldName("namespace")
contentAll = FieldName("content_all")
# virtual score field
score: FieldName = FieldName("score")
score = FieldName("score")


class Analyzers:
"""A collection of analyzers."""

textIndex = Analyzer(
tokenizer=Tokenizers.uax29UrlEmail,
filters=[
Expand All @@ -58,6 +64,8 @@ class Analyzers:


class FieldTypes:
"""A collection of field types."""

id: FieldType = FieldType.id(TypeName("SearchId")).make_doc_value()
string: FieldType = FieldType.str(TypeName("SearchString")).make_doc_value()
text: FieldType = (
Expand Down
150 changes: 0 additions & 150 deletions components/renku_data_services/solr/main.py

This file was deleted.

Loading

0 comments on commit cd068fe

Please sign in to comment.