Skip to content

Commit

Permalink
[inferno-ml-server-types] Split Model and ModelVersion (#107)
Browse files Browse the repository at this point in the history
Splits the `Model` type into two parts:
- `Model`, which now just contains metadata that should not change
between versions (`name`, `user`, `permissions`)
- `ModelVersion`, which contains other version-specific metadata as well
as the actual model contents

Splitting the type like this helps prevent certain undesirable
scenarios. For example, all model versions previously had their own
`permissions`. This meant that it would be possible for a user to have
access to (or even own) one version of a model, but not others. That's
not what we want.

Also, previously the different versions of a model were only linked
together by their `name` field. This wasn't really structured into the
database in any way. Now it is possible to get all versions of a model
directly by a foreign key lookup.

I've integrated these changes on branches for the other Inferno ML
projects but this will need to be merged first.

The DB tables now look like this: 

```plantuml
,------------------------.
| Model                  |
|------------------------|
| id : serial            |
| --                     |
| * name : text          |
| * permissions : jsonb  |
| user : User (id)       |
|                        |
`------------------------'
          \|/
           o
           |           
           |           
           _
           -
,------------------------. 
| ModelVersion           | 
|------------------------| 
| id : serial            | 
| --                     | 
| * model : Model (id)   | 
| * card : jsonb         | 
| * contents : oid       | 
| * version : text       | 
|                        |
`------------------------' 

```
  • Loading branch information
ngua authored Mar 21, 2024
1 parent 2b4f431 commit c2bee8a
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 46 deletions.
3 changes: 3 additions & 0 deletions inferno-ml-server-types/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Revision History for inferno-ml-server-types
*Note*: we use https://pvp.haskell.org/ (MAJOR.MAJOR.MINOR.PATCH)

## 0.1.0
* Split `Model` and `ModelVersion`

## 0.0.1
* Initial pre-release
2 changes: 1 addition & 1 deletion inferno-ml-server-types/inferno-ml-server-types.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 2.4
name: inferno-ml-server-types
version: 0.0.1
version: 0.1.0
synopsis: Types for Inferno ML server
description: Types for Inferno ML server
homepage: https://github.com/plow-technologies/inferno.git#readme
Expand Down
159 changes: 114 additions & 45 deletions inferno-ml-server-types/src/Inferno/ML/Server/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ import System.Posix (EpochTime)
import Text.Read (readMaybe)
import URI.ByteString (Absolute, URIRef)
import URI.ByteString.Aeson ()
import Web.HttpApiData (FromHttpApiData (parseUrlPiece), ToHttpApiData (toUrlPiece))
import Web.HttpApiData
( FromHttpApiData (parseUrlPiece),
ToHttpApiData (toUrlPiece),
)

-- API type for `inferno-ml-server`
type InfernoMlServerAPI uid gid p s =
Expand Down Expand Up @@ -204,21 +207,18 @@ instance
<$> fmap wrappedTo (field @VCObjectHashRow)
<*> fmap getAeson field

-- | Row of the model table, parameterized by the user and group type as well
-- as the contents of the model (normally this will be an 'Oid')
data Model uid gid c = Model
{ id :: Maybe (Id (Model uid gid c)),
-- | Row of the model table, parameterized by the user and group type. This
-- table contains metadata for models that should not change between different
-- versions, e.g. model name and permissions. A second table, 'ModelVersion',
-- contains the specific versions of each model (and the actual model contents),
-- along with other metadata that may change between versions
data Model uid gid = Model
{ id :: Maybe (Id (Model uid gid)),
name :: Text,
-- | The actual contents of the model
contents :: c,
version :: Version,
-- | Stored as JSON in the DB
card :: ModelCard,
-- | Permissions for reading or updating the model, keyed by the group ID
-- type
--
-- NOTE
-- This is stored as a @jsonb@ rather than as @hstore@. It could
-- NOTE: This is stored as a @jsonb@ rather than as @hstore@. It could
-- currently be stored as an @hstore@, but later we might want to
-- use a more complex type that we could not easily convert to\/from
-- text (which is required to use @hstore@). So using @jsonb@ allows
Expand All @@ -230,71 +230,54 @@ data Model uid gid c = Model
}
deriving stock (Show, Eq, Generic)

instance NFData (Model uid gid c) where
instance NFData (Model uid gid) where
rnf = rwhnf

instance
( Typeable uid,
Typeable gid,
( Typeable gid,
FromField uid,
FromField gid,
FromJSONKey gid,
Ord gid
) =>
FromRow (Model uid gid Oid)
FromRow (Model uid gid)
where
-- NOTE
-- Order of fields must align exactly with DB schema
-- NOTE: Order of fields must align exactly with DB schema
fromRow =
Model
<$> field
<*> field
<*> field
<*> field
<*> field
<*> fmap getAeson field
<*> field

instance
( Typeable uid,
Typeable gid,
ToField uid,
( ToField uid,
ToField gid,
ToJSONKey gid
) =>
ToRow (Model uid gid Oid)
ToRow (Model uid gid)
where
-- NOTE
-- Order of fields must align exactly with DB schema
-- NOTE: Order of fields must align exactly with DB schema
toRow m =
[ toField Default,
m ^. the @"name" & toField,
m ^. the @"contents" & toField,
m ^. the @"version" & toField,
m ^. the @"card" & toField,
m ^. the @"permissions" & Aeson & toField,
m ^. the @"user" & toField
]

{- ORMOLU_DISABLE -}
instance
( FromJSON uid,
FromJSON gid,
FromJSONKey gid,
Ord gid
) =>
FromJSON (Model uid gid Oid)
FromJSON (Model uid gid)
where
parseJSON = withObject "Model" $ \o ->
Model
-- Note that for a model serialized as JSON, the `id` must be present
-- (this assumes that a serialized model always refers to one that exists
-- in the DB already)
<$> fmap Just (o .: "id")
-- If a new model is being created, its ID will not be present
<$> o .:? "id"
<*> (ensureNotNull =<< o .: "name")
<*> fmap (Oid . fromIntegral @Word64) (o .: "contents")
<*> o .: "version"
<*> o .: "card"
<*> o .: "permissions"
<*> o .:? "user"
where
Expand All @@ -307,21 +290,104 @@ instance

instance
( ToJSON uid,
ToJSON gid,
ToJSONKey gid
) =>
ToJSON (Model uid gid Oid)
ToJSON (Model uid gid)
where
toJSON m =
object
[ "id" .= view (the @"id") m,
"name" .= view (the @"name") m,
"contents" .= view (the @"contents" . to unOid) m,
"version" .= view (the @"version") m,
"card" .= view (the @"card") m,
"permissions" .= view (the @"permissions") m,
"user" .= view (the @"user") m
]

-- | Represents rows of the model version tables; each row is linked to its
-- 'Model' parent and also contains the actual contents of the model. This
-- is parameterized by the user and group types as well as the type of the
-- content, which will normally be an 'Oid' (Postgres large object). Other
-- model metadata is contained here as well, e.g. the model card, as this
-- might change between versions
data ModelVersion uid gid c = ModelVersion
{ id :: Maybe (Id (ModelVersion uid gid c)),
-- | Foreign key of the @model@ table, which contains invariant metadata
-- related to the model, i.e. name, permissions, user
model :: Id (Model uid gid),
card :: ModelCard,
-- | The actual contents of version of the model. Normally this will be
-- an 'Oid' pointing to the serialized bytes of the model imported into
-- the PSQL large object table
contents :: c,
version :: Version
}
deriving stock (Show, Eq, Generic)
deriving anyclass (NFData)

instance
( FromField uid,
FromField gid
) =>
FromRow (ModelVersion uid gid Oid)
where
-- NOTE: Order of fields must align exactly with DB schema. This instance
-- could just be `anyclass` derived but it's probably better to be as
-- explicit as possible
fromRow =
ModelVersion
<$> field
<*> field
<*> field
<*> field
<*> field

instance
( ToField uid,
ToField gid
) =>
ToRow (ModelVersion uid gid Oid)
where
-- NOTE: Order of fields must align exactly with DB schema
toRow mv =
[ toField Default,
mv ^. the @"model" & toField,
mv ^. the @"card" & Aeson & toField,
mv ^. the @"contents" & toField,
mv ^. the @"version" & toField
]

{- ORMOLU_DISABLE -}
instance
( FromJSON uid,
FromJSON gid
) =>
FromJSON (ModelVersion uid gid Oid)
where
parseJSON = withObject "ModelVersion" $ \o ->
ModelVersion
-- Note that for a model serialized as JSON, the `id` must be present
-- (this assumes that a model version serialized as JSON always refers
-- to one that exists in the DB already)
<$> fmap Just (o .: "id")
<*> o .: "model"
<*> o .: "card"
<*> fmap (Oid . fromIntegral @Word64) (o .: "contents")
<*> o .: "version"
{- ORMOLU_ENABLE -}

instance
( ToJSON uid,
ToJSON gid
) =>
ToJSON (ModelVersion uid gid Oid)
where
toJSON mv =
object
[ "id" .= view (the @"id") mv,
"model" .= view (the @"model") mv,
"contents" .= view (the @"contents" . to unOid) mv,
"version" .= view (the @"version") mv,
"card" .= view (the @"card") mv
]
where
unOid :: Oid -> Word32
unOid (Oid (CUInt x)) = x
Expand All @@ -333,6 +399,7 @@ data ModelPermissions
| -- | The model can be updated e.g. during training
WriteModel
deriving stock (Show, Eq, Generic)
deriving anyclass (NFData)

instance FromJSON ModelPermissions where
parseJSON = withText "ModelPermissions" $ \case
Expand Down Expand Up @@ -498,7 +565,9 @@ data InferenceParam uid gid p s = InferenceParam
-- For existing inference params, this is the foreign key for the specific
-- script in the 'InferenceScript' table
script :: s,
model :: Id (Model uid gid Oid),
-- | This needs to be linked to a specific version of a model rather
-- than the @model@ table itself
model :: Id (ModelVersion uid gid Oid),
inputs :: Vector (SingleOrMany p),
outputs :: Vector (SingleOrMany p),
user :: uid
Expand Down

0 comments on commit c2bee8a

Please sign in to comment.