Merge pull request #1 from shalb/hugging-face-model

Hugging face model
shalb · Nov 1, 2023 · 3e226d7 · 3e226d7
2 parents 618eff0 + 64d47bd
commit 3e226d7
Show file tree

Hide file tree

Showing 15 changed files with 786 additions and 0 deletions.
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -0,0 +1,30 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - '*'
+
+env:
+  REGISTRY_URL: registry-1.docker.io
+  REGISTRY_REPO: shalb/charts
+
+jobs:
+  release:
+    name: Release helm charts
+    runs-on: ubuntu-latest
+    container: alpine/helm:3.12.0
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set env
+      run: |
+        echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+
+    - name: Build and push
+      run: |
+        echo ${{ secrets.DOCKERHUB_TOKEN }} | helm registry login -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin ${{ env.REGISTRY_URL}}
+        for CHART_NAME in $(find -maxdepth 2 -type f -name "Chart.yaml" | cut -d"/" -f2); do
+          helm package --version ${{ env.RELEASE_VERSION }} ${CHART_NAME}
+          helm push ${CHART_NAME}-${{ env.RELEASE_VERSION }}.tgz oci://${{ env.REGISTRY_URL}}/${{ env.REGISTRY_REPO}}
+        done
diff --git a/huggingface-model/.helmignore b/huggingface-model/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/huggingface-model/Chart.yaml b/huggingface-model/Chart.yaml
@@ -0,0 +1,8 @@
+apiVersion: v2
+name: huggingface-model
+description: Helm chart for deploy Hugging Face to kubernetes cluster. See [Hugging Face models](https://huggingface.co/models)
+
+type: application
+
+version: 0.0.1
+
diff --git a/huggingface-model/README.md b/huggingface-model/README.md
@@ -0,0 +1,60 @@
+# Helm chart for deploy Hugging Face to kubernetes cluster
+
+See [Hugging Face models](https://huggingface.co/models)
+
+## Parameters
+
+### Model
+
+| Name                        | Description                                          | Value                                                 |
+| --------------------------- | ---------------------------------------------------- | ----------------------------------------------------- |
+| `model.organization`        | Models' company name on huggingface, required!       | `""`                                                  |
+| `model.name`                | Models' name on huggingface, required!               | `""`                                                  |
+| `init.s3.enabled`           | Turn on/off s3 data source Default: disabled         | `false`                                               |
+| `init.s3.bucketURL`         | Full s3 URL included path to model's folder          | `s3://k8s-model-zephyr/llm/deployment/segmind/SSD-1B` |
+| `huggingface.containerPort` | Deployment/StatefulSet ContainerPort, optional       | `8080`                                                |
+| `huggingface.args`          | Additional arg for text-generation-launcher optional | `[]`                                                  |
+
+### Global
+
+| Name                              | Description                                                                                      | Value                                           |
+| --------------------------------- | ------------------------------------------------------------------------------------------------ | ----------------------------------------------- |
+| `replicaCount`                    | Deployment/StatefulSet replicaCount                                                              | `1`                                             |
+| `kind`                            | Resource king [allowed values: deployment/StatefulSet, optional]                                 | `deployment`                                    |
+| `image.repo`                      | Huggingface image repo                                                                           | `ghcr.io/huggingface/text-generation-inference` |
+| `image.tag`                       | Huggingface image version                                                                        | `latest`                                        |
+| `image.pullPolicy`                | Huggingface image pull policy                                                                    | `IfNotPresent`                                  |
+| `imagePullSecrets`                | May need if used private repo as a cache for image ghcr.io/huggingface/text-generation-inference | `[]`                                            |
+| `nameOverride`                    | String to partially override common.names.name                                                   | `""`                                            |
+| `fullnameOverride`                | String to fully override common.names.fullname                                                   | `""`                                            |
+| `persistence.accessModes`         | PVC accessModes                                                                                  | `["ReadWriteOnce"]`                             |
+| `persistence.storageClassName`    | Kubernetes storageClass name                                                                     | `gp2`                                           |
+| `persistence.storage`             | Volume size                                                                                      | `100Gi`                                         |
+| `service.port`                    | Service port, default 8080                                                                       | `8080`                                          |
+| `service.type`                    | Service type, default ClusterIP                                                                  | `ClusterIP`                                     |
+| `serviceAccount.create`           | Enable/disable service account, default enabled                                                  | `true`                                          |
+| `serviceAccount.role`             | Kubernetes role configuration, default nil                                                       | `{}`                                            |
+| `podAnnotations`                  | Annotations for Redis&reg; replicas pods                                                         | `{}`                                            |
+| `securityContext`                 | Set pod's Security Context fsGroup                                                               | `{}`                                            |
+| `extraEnvVars`                    | Array with extra environment variables to add to main pod                                        | `[]`                                            |
+| `ingresses.enabled`               | Enable/disable ingress(es) for model API, default disabled                                       | `false`                                         |
+| `ingresses.configs`               | List of ingresses configs                                                                        | `[]`                                            |
+| `livenessProbe`                   | Configure extra options for model liveness probe                                                 | `{}`                                            |
+| `readinessProbe`                  | Configure extra options for model readiness probe                                                | `{}`                                            |
+| `startupProbe`                    | Configure extra options for model startup probe                                                  | `{}`                                            |
+| `pdb.create`                      | Specifies whether a PodDisruptionBudget should be created                                        | `false`                                         |
+| `pdb.minAvailable`                | Min number of pods that must still be available after the eviction                               | `1`                                             |
+| `pdb.maxUnavailable`              | Max number of pods that can be unavailable after the eviction                                    | `""`                                            |
+| `resources.limits.nvidia.com/gpu` | The required option by text-generation-launcher                                                  | `1`                                             |
+| `resources.requests.cpu`          | The requested CPU minimal recommended value                                                      | `3`                                             |
+| `resources.requests.memory`       | The requested memory minimal recommended size                                                    | `10Gi`                                          |
+| `extraVolumes`                    | Optionally specify extra list of additional volumes for models' pods                             | `[]`                                            |
+| `extraVolumeMounts`               | Optionally specify extra list of additional volumeMounts for models' container                   | `[]`                                            |
+| `autoscaling.enabled`             | Enable Horizontal POD autoscaling for model                                                      | `true`                                          |
+| `autoscaling.minReplicas`         | Minimum number of model replicas                                                                 | `1`                                             |
+| `autoscaling.maxReplicas`         | Maximum number of model replicas                                                                 | `5`                                             |
+| `autoscaling.targetCPU`           | Target CPU utilization percentage                                                                | `50`                                            |
+| `autoscaling.targetMemory`        | Target Memory utilization percentage                                                             | `50`                                            |
+| `affinity`                        | Affinity for pod assignment                                                                      | `{}`                                            |
+| `nodeSelector`                    | Node labels for pod assignment                                                                   | `{}`                                            |
+| `tolerations`                     | Tolerations for pod assignment                                                                   | `[]`                                            |
diff --git a/huggingface-model/templates/_helpers.tpl b/huggingface-model/templates/_helpers.tpl
@@ -0,0 +1,83 @@
+{{/*
+Generate internal container port.
+*/}}
+{{- define "huggingface-model.containerPort" -}}
+{{- if .Values.huggingface }}
+{{- default 8080 .Values.huggingface.containerPort  }}
+{{- else }}
+8080
+{{- end }}
+{{- end}}
+{{- define "common.capabilities.kubeVersion" -}}
+{{- if .Values.global }}
+    {{- if .Values.global.kubeVersion }}
+    {{- .Values.global.kubeVersion -}}
+    {{- else }}
+    {{- default .Capabilities.KubeVersion.Version .Values.kubeVersion -}}
+    {{- end -}}
+{{- else }}
+{{- default .Capabilities.KubeVersion.Version .Values.kubeVersion -}}
+{{- end -}}
+{{- end -}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "huggingface-model.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "huggingface-model.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "huggingface-model.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "huggingface-model.labels" -}}
+helm.sh/chart: {{ include "huggingface-model.chart" . }}
+{{ include "huggingface-model.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "huggingface-model.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "huggingface-model.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "huggingface-model.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "huggingface-model.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/huggingface-model/templates/application.yaml b/huggingface-model/templates/application.yaml
@@ -0,0 +1,125 @@
+{{- if or ( not .Values.model ) ( or (not .Values.model.name) (not .Values.model.organization) ) -}}
+{{- fail "model.name and model.id are required but not set!" -}}
+{{- end }}
+{{- $kind := .Values.kind | default "deployment" }}
+{{- $namePrefix := include "huggingface-model.fullname" . | trunc 63 | trimSuffix "-" -}}
+apiVersion: apps/v1
+kind: {{ $kind }}
+metadata:
+  annotations:
+    reloader.stakater.com/auto: "true"
+  name: {{ include "huggingface-model.fullname" . }}
+  labels:
+    {{- include "huggingface-model.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "huggingface-model.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+    {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+    {{- end }}
+      labels:
+        {{- include "huggingface-model.selectorLabels" . | nindent 8 }}
+        {{- if .Values.pdb.create }}
+        pdbEnabled: {{ include "huggingface-model.fullname" . }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if eq $kind "StatefulSet" }}
+      serviceName: {{ include "huggingface-model.fullname" . }}
+      {{- end }}
+      {{- if .Values.serviceAccount.create }}
+      serviceAccountName: {{ include "huggingface-model.serviceAccountName" . }}
+      {{- end }}
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      {{- end }}
+      initContainers:
+        - name: init
+          image: alpine:3.18.4
+          imagePullPolicy: "IfNotPresent"
+          command: [ "/bin/sh", "-c" ]
+          args:
+            - if [ -d "/data/{{ .Values.model.name }}" ]; then echo "Model {{ .Values.model.id }} is already downloaded. Skipping init..."; exit 0; fi
+            {{- if and .Values.init.s3.enabled }}
+            - apk add --update aws-cli
+            - aws s3 cp --recursive {{ .Values.init.s3.bucketURL }} /data/{{ .Values.model.name }}
+            {{- else }}
+            - apk add --update git-lfs
+            - git clone --depth=1 https://huggingface.co/{{ .Values.model.id }}
+            {{- end }}
+          volumeMounts:
+            - name: model-storage
+              mountPath: /data
+          resources:
+            requests:
+              cpu: "3"
+      containers:
+        - name: model
+          image: {{ .Values.image.repo }}:{{ .Values.image.tag }}
+          imagePullPolicy: {{ .Values.image.pullPolicy | default "IfNotPresent" }}
+          command: [ "text-generation-launcher" ]
+          args:
+            - "--model-id"
+            - "{{ .Values.model.id }}"
+            - "--huggingface-hub-cache"
+            - "/usr/src/{{ .Values.model.name }}"
+            - "--weights-cache-override"
+            - "/usr/src/{{ .Values.model.name }}"
+            {{- with .Values.huggingface.args }}
+            {{ toYaml . | indent 12 }}
+            {{- end }}
+          ports:
+            - containerPort: {{ include "huggingface-model.containerPort" . }}
+          env:
+            - name: PORT
+              value: {{ include "huggingface-model.containerPort" . }}
+            - name: HUGGINGFACE_OFFLINE
+              value: "1"
+            {{- with .Values.extraEnvVars }}
+            {{ toYaml . | nindent 12 }}
+            {{- end}}
+          {{- with .Values.livenessProbe }}
+          livenessProbe: {{ toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinessProbe }}
+          readinessProbe: {{ toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.startupProbe }}
+          startupProbe: {{ toYaml . | nindent 12 }}
+          {{- end }}
+          volumeMounts:
+            - name: model-storage
+              mountPath: /data
+          {{- with .Values.extraVolumeMounts}}
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      volumes:
+        - name: model-storage
+          persistentVolumeClaim:
+            claimName: {{ include "huggingface-model.fullname" . }}
+      {{- with .Values.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/huggingface-model/templates/auth-secret.yaml b/huggingface-model/templates/auth-secret.yaml
@@ -0,0 +1,9 @@
+{{- if .Values.authSecret }}
+apiVersion: v1
+data:
+  auth: {{ .Values.authSecret.basicAuth.authHash }}
+kind: Secret
+metadata:
+  name: {{ .Values.authSecret.basicAuth.secretName }}
+{{- end }}
+
diff --git a/huggingface-model/templates/hpa.yaml b/huggingface-model/templates/hpa.yaml
@@ -0,0 +1,42 @@
+{{- if .Values.autoscaling.enabled }}
+{{- $kind := .Values.kind | default "deployment" }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "huggingface-model.fullname" . }}
+  labels:
+    {{- include "huggingface-model.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: {{ $kind }}
+    name:  {{ include "huggingface-model.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetMemory }}
+    - type: Resource
+      resource:
+        name: memory
+        {{- if semverCompare "<1.23-0" (include "common.capabilities.kubeVersion" .) }}
+        targetAverageUtilization: {{ .Values.autoscaling.targetMemory }}
+        {{- else }}
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemory }}
+        {{- end }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetCPU }}
+    - type: Resource
+      resource:
+        name: cpu
+        {{- if semverCompare "<1.23-0" (include "common.capabilities.kubeVersion" .) }}
+        targetAverageUtilization: {{ .Values.autoscaling.targetCPU }}
+        {{- else }}
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPU }}
+        {{- end }}
+    {{- end }}
+
+{{- end }}