Skip to content

Convert model to gguf with specified quant #121

Convert model to gguf with specified quant

Convert model to gguf with specified quant #121

name: Convert model to gguf with specified quant
on:
workflow_dispatch:
inputs:
source_model_id:
description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
required: true
source_model_size:
description: "The model size. For ex: 8b"
required: true
type: string
target_model_id:
description: "Target HuggingFace model ID to push. For ex: llama3.1"
required: true
type: string
quantization_level:
description: "Quantization level (e.g., 'q4-km') or 'all' for all levels"
required: true
type: string
default: 'all'
env:
USER_NAME: cortexso
SOURCE_MODEL_ID: ${{ inputs.source_model_id }}
SOURCE_MODEL_SIZE: ${{ inputs.source_model_size }}
TARGET_MODEL_ID: ${{ inputs.target_model_id }}
QUANT_LEVEL: ${{ inputs.quantization_level }}
jobs:
converter:
runs-on: ubuntu-20-04-gguf
timeout-minutes: 7200
steps:
- name: Checkout
uses: actions/checkout@v4 # v4.1.7
with:
submodules: recursive
repository: janhq/cortex.llamacpp
- name: Set up Python
uses: actions/setup-python@v5 # v5.1.1
with:
python-version: '3.12'
# architecture: 'x64'
- name: Cache Python packages
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: |
~/.cache/pip
~/.local/share/pip
.venv
key: ${{ runner.os }}-pip-${{ github.sha }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
pip3 install -r llama.cpp/requirements.txt
pip3 install hf-transfer
pip3 install openai
git lfs install
- name: Extract MODEL_NAME
run: |
SOURCE_MODEL_ID="${{ env.SOURCE_MODEL_ID }}"
MODEL_NAME="$(echo $SOURCE_MODEL_ID | rev | cut -d/ -f1 | rev)"
echo $MODEL_NAME
MODEL_NAME="$(echo $MODEL_NAME | tr '[:upper:]' '[:lower:]')"
echo $MODEL_NAME
echo "MODEL_NAME=$MODEL_NAME" >> $GITHUB_ENV
- name: Print environment variables
run: |
echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
echo "SOURCE_MODEL_SIZE: ${{ env.SOURCE_MODEL_SIZE }}"
echo "TARGET_MODEL_ID: ${{ env.TARGET_MODEL_ID }}"
# - name: Check file existence
# id: check_files
# uses: andstor/file-existence-action@v1
# with:
# files: "/mnt/models/${{ env.MODEL_NAME }}/hf"
- name: Prepare folders
# if: steps.check_files.outputs.files_exists == 'false'
run: |
mkdir -p /mnt/models/${{ env.MODEL_NAME }}/hf
mkdir -p /mnt/models/.cache
- name: Download Hugging Face model
uses: nick-fields/retry@v2
with:
timeout_minutes: 10
max_attempts: 5
command: HF_HUB_ETAG_TIMEOUT=500 huggingface-cli download --repo-type model --local-dir /mnt/models/${{ env.MODEL_NAME }}/hf --cache-dir /mnt/models/.cache --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} ${{ env.SOURCE_MODEL_ID }}
- name: Create README.md
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
python3 - << EOF
import os
from openai import OpenAI
# Initialize the OpenAI client
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
# Inputs
source_model_id = os.environ['SOURCE_MODEL_ID']
model_name = os.environ['MODEL_NAME']
model_variant = os.environ['SOURCE_MODEL_SIZE']
target_model_id = os.environ['TARGET_MODEL_ID']
user_name = os.environ['USER_NAME']
# Extract author from source model ID
author = source_model_id.split('/')[0]
# Call OpenAI API to generate overview
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": f"Write a concise overview for a machine learning model named '{target_model_id}' derived from '{source_model_id}', highlighting its purpose, use cases, and performance. You DO NOT generate title (# heading 1) and summary sections. For the overview section, make it concise and in a paragraph of 5 sentences"
}
]
)
overview = completion.choices[0].message.content.strip()
# README.md template
readme_template = f"""\
---
license: mit
---
## Overview
{overview}
## Variants
| No | Variant | Cortex CLI command |
| --- | --- | --- |
| 1 | [gguf](https://huggingface.co/{user_name}/{model_name}/tree/main) | cortex run {model_name} |
## Use it with Jan (UI)
1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)
2. Use in Jan model Hub:
{user_name}/{model_name}
## Use it with Cortex (CLI)
1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)
2. Run the model with command:
cortex run {model_name}
## Credits
- **Author:** {author}
- **Converter:** [Homebrew](https://www.homebrew.ltd/)
- **Original License:** [License](https://huggingface.co/{user_name}/{model_name}#license)
"""
# Write the README.md file
with open('README.md', 'w') as f:
f.write(readme_template)
EOF
- name: Upload README.md to HuggingFace Repository
env:
HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_WRITE }}
run: |
python3 - << EOF
from huggingface_hub import HfApi
import os
# Initialize the Hugging Face API
api = HfApi(token=os.environ['HF_TOKEN'])
# Repository details
repo_id = f"${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }}"
print("Uploading README.md to repository:", repo_id)
# Upload README.md
api.upload_file(
path_or_fileobj="README.md",
path_in_repo="README.md",
repo_id=repo_id,
token=os.environ['HF_TOKEN']
)
print("README.md uploaded successfully")
EOF
- name: Build lib for quantize
run: |
cd llama.cpp
cmake -B build
cmake --build build --config Release
cd ../../
- name: Convert to GGUF
run: |
mkdir -p /mnt/models/${{ env.MODEL_NAME }}/gguf
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
python3 llama.cpp/convert_hf_to_gguf.py "/mnt/models/${{ env.MODEL_NAME }}/hf" --outfile "/mnt/models/${{ env.MODEL_NAME }}/gguf/model-origin.gguf"
huggingface-cli logout
- name: Quantize the model
run: |
declare -A quant_map=(
["q2-k"]="Q2_K"
["q3-ks"]="Q3_K_S"
["q3-km"]="Q3_K_M"
["q3-kl"]="Q3_K_L"
["q4-ks"]="Q4_K_S"
["q4-km"]="Q4_K_M"
["q5-ks"]="Q5_K_S"
["q5-km"]="Q5_K_M"
["q6-k"]="Q6_K"
["q8-0"]="Q8_0"
)
if [ "${{ env.QUANT_LEVEL }}" = "all" ]; then
quant_levels=("q2-k" "q3-ks" "q3-km" "q3-kl" "q4-ks" "q4-km" "q5-ks" "q5-km" "q6-k" "q8-0")
else
quant_levels=("${{ env.QUANT_LEVEL }}")
fi
for quant in "${quant_levels[@]}"; do
mkdir -p /mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/
[ ! -f /mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/model.gguf ] && ./llama.cpp/build/bin/llama-quantize /mnt/models/${{ env.MODEL_NAME }}/gguf/model-origin.gguf /mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/model.gguf ${quant_map[${quant}]}
done
rm -rf /mnt/models/${{ env.MODEL_NAME }}/gguf/model-origin.gguf
- name: Upload to Hugging Face
run: |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
declare -A quant_map=(
["q2-k"]="q2_k"
["q3-ks"]="q3_k_s"
["q3-km"]="q3_k_m"
["q3-kl"]="q3_k_l"
["q4-ks"]="q4_k_s"
["q4-km"]="q4_k_m"
["q5-ks"]="q5_k_s"
["q5-km"]="q5_k_m"
["q6-k"]="q6_k"
["q8-0"]="q8_0"
)
if [ "${{ env.QUANT_LEVEL }}" = "all" ]; then
quant_levels=("q2-k" "q3-ks" "q3-km" "q3-kl" "q4-ks" "q4-km" "q5-ks" "q5-km" "q6-k" "q8-0")
else
quant_levels=("${{ env.QUANT_LEVEL }}")
fi
for quant in "${quant_levels[@]}"; do
new_name="${{ env.MODEL_NAME }}-${quant_map[${quant}]}.gguf"
mv "/mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/model.gguf" "/mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/${new_name}"
huggingface-cli upload "${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }}" "/mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/" .
done
rm -rf /mnt/models/${{ env.MODEL_NAME }}/gguf/*
huggingface-cli logout
rm -rf llama.cpp/build/