Convert model to gguf with specified quant

Convert model to gguf with specified quant #121

Summary
Jobs
- converter
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/convert-model-all-quant.yml at d3067a4

	name: Convert model to gguf with specified quant

	on:
	workflow_dispatch:
	inputs:
	source_model_id:
	description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
	required: true
	source_model_size:
	description: "The model size. For ex: 8b"
	required: true
	type: string
	target_model_id:
	description: "Target HuggingFace model ID to push. For ex: llama3.1"
	required: true
	type: string
	quantization_level:
	description: "Quantization level (e.g., 'q4-km') or 'all' for all levels"
	required: true
	type: string
	default: 'all'

	env:
	USER_NAME: cortexso
	SOURCE_MODEL_ID: ${{ inputs.source_model_id }}
	SOURCE_MODEL_SIZE: ${{ inputs.source_model_size }}
	TARGET_MODEL_ID: ${{ inputs.target_model_id }}
	QUANT_LEVEL: ${{ inputs.quantization_level }}

	jobs:
	converter:
	runs-on: ubuntu-20-04-gguf
	timeout-minutes: 7200
	steps:
	- name: Checkout
	uses: actions/checkout@v4 # v4.1.7
	with:
	submodules: recursive
	repository: janhq/cortex.llamacpp

	- name: Set up Python
	uses: actions/setup-python@v5 # v5.1.1
	with:
	python-version: '3.12'
	# architecture: 'x64'

	- name: Cache Python packages
	uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
	with:
	path: \|
	~/.cache/pip
	~/.local/share/pip
	.venv
	key: ${{ runner.os }}-pip-${{ github.sha }}
	restore-keys: \|
	${{ runner.os }}-pip-

	- name: Install dependencies
	run: \|
	pip3 install -r llama.cpp/requirements.txt
	pip3 install hf-transfer
	pip3 install openai
	git lfs install

	- name: Extract MODEL_NAME
	run: \|
	SOURCE_MODEL_ID="${{ env.SOURCE_MODEL_ID }}"
	MODEL_NAME="$(echo $SOURCE_MODEL_ID \| rev \| cut -d/ -f1 \| rev)"
	echo $MODEL_NAME
	MODEL_NAME="$(echo $MODEL_NAME \| tr '[:upper:]' '[:lower:]')"
	echo $MODEL_NAME
	echo "MODEL_NAME=$MODEL_NAME" >> $GITHUB_ENV

	- name: Print environment variables
	run: \|
	echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
	echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
	echo "SOURCE_MODEL_SIZE: ${{ env.SOURCE_MODEL_SIZE }}"
	echo "TARGET_MODEL_ID: ${{ env.TARGET_MODEL_ID }}"

	# - name: Check file existence
	# id: check_files
	# uses: andstor/file-existence-action@v1
	# with:
	# files: "/mnt/models/${{ env.MODEL_NAME }}/hf"


	- name: Prepare folders
	# if: steps.check_files.outputs.files_exists == 'false'
	run: \|
	mkdir -p /mnt/models/${{ env.MODEL_NAME }}/hf
	mkdir -p /mnt/models/.cache

	- name: Download Hugging Face model
	uses: nick-fields/retry@v2
	with:
	timeout_minutes: 10
	max_attempts: 5
	command: HF_HUB_ETAG_TIMEOUT=500 huggingface-cli download --repo-type model --local-dir /mnt/models/${{ env.MODEL_NAME }}/hf --cache-dir /mnt/models/.cache --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} ${{ env.SOURCE_MODEL_ID }}

	- name: Create README.md
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	run: \|
	python3 - << EOF
	import os
	from openai import OpenAI

	# Initialize the OpenAI client
	client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

	# Inputs
	source_model_id = os.environ['SOURCE_MODEL_ID']
	model_name = os.environ['MODEL_NAME']
	model_variant = os.environ['SOURCE_MODEL_SIZE']
	target_model_id = os.environ['TARGET_MODEL_ID']
	user_name = os.environ['USER_NAME']

	# Extract author from source model ID
	author = source_model_id.split('/')[0]

	# Call OpenAI API to generate overview
	completion = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{
	"role": "user",
	"content": f"Write a concise overview for a machine learning model named '{target_model_id}' derived from '{source_model_id}', highlighting its purpose, use cases, and performance. You DO NOT generate title (# heading 1) and summary sections. For the overview section, make it concise and in a paragraph of 5 sentences"
	}
	]
	)
	overview = completion.choices[0].message.content.strip()

	# README.md template
	readme_template = f"""\

	---
	license: mit
	---

	## Overview

	{overview}

	## Variants

	\| No \| Variant \| Cortex CLI command \|
	\| --- \| --- \| --- \|
	\| 1 \| [gguf](https://huggingface.co/{user_name}/{model_name}/tree/main) \| cortex run {model_name} \|

	## Use it with Jan (UI)

	1. Install Jan using [Quickstart](https://jan.ai/docs/quickstart)
	2. Use in Jan model Hub:

	{user_name}/{model_name}


	## Use it with Cortex (CLI)

	1. Install Cortex using [Quickstart](https://cortex.jan.ai/docs/quickstart)
	2. Run the model with command:

	cortex run {model_name}


	## Credits

	- Author: {author}
	- Converter: [Homebrew](https://www.homebrew.ltd/)
	- Original License: [License](https://huggingface.co/{user_name}/{model_name}#license)
	"""

	# Write the README.md file
	with open('README.md', 'w') as f:
	f.write(readme_template)
	EOF

	- name: Upload README.md to HuggingFace Repository
	env:
	HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_WRITE }}
	run: \|
	python3 - << EOF
	from huggingface_hub import HfApi
	import os

	# Initialize the Hugging Face API
	api = HfApi(token=os.environ['HF_TOKEN'])

	# Repository details
	repo_id = f"${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }}"
	print("Uploading README.md to repository:", repo_id)
	# Upload README.md
	api.upload_file(
	path_or_fileobj="README.md",
	path_in_repo="README.md",
	repo_id=repo_id,
	token=os.environ['HF_TOKEN']
	)
	print("README.md uploaded successfully")
	EOF

	- name: Build lib for quantize
	run: \|
	cd llama.cpp
	cmake -B build
	cmake --build build --config Release
	cd ../../

	- name: Convert to GGUF
	run: \|
	mkdir -p /mnt/models/${{ env.MODEL_NAME }}/gguf
	huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
	python3 llama.cpp/convert_hf_to_gguf.py "/mnt/models/${{ env.MODEL_NAME }}/hf" --outfile "/mnt/models/${{ env.MODEL_NAME }}/gguf/model-origin.gguf"
	huggingface-cli logout

	- name: Quantize the model
	run: \|
	declare -A quant_map=(
	["q2-k"]="Q2_K"
	["q3-ks"]="Q3_K_S"
	["q3-km"]="Q3_K_M"
	["q3-kl"]="Q3_K_L"
	["q4-ks"]="Q4_K_S"
	["q4-km"]="Q4_K_M"
	["q5-ks"]="Q5_K_S"
	["q5-km"]="Q5_K_M"
	["q6-k"]="Q6_K"
	["q8-0"]="Q8_0"
	)

	if [ "${{ env.QUANT_LEVEL }}" = "all" ]; then
	quant_levels=("q2-k" "q3-ks" "q3-km" "q3-kl" "q4-ks" "q4-km" "q5-ks" "q5-km" "q6-k" "q8-0")
	else
	quant_levels=("${{ env.QUANT_LEVEL }}")
	fi

	for quant in "${quant_levels[@]}"; do
	mkdir -p /mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/
	[ ! -f /mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/model.gguf ] && ./llama.cpp/build/bin/llama-quantize /mnt/models/${{ env.MODEL_NAME }}/gguf/model-origin.gguf /mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/model.gguf ${quant_map[${quant}]}
	done

	rm -rf /mnt/models/${{ env.MODEL_NAME }}/gguf/model-origin.gguf

	- name: Upload to Hugging Face
	run: \|
	huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential

	declare -A quant_map=(
	["q2-k"]="q2_k"
	["q3-ks"]="q3_k_s"
	["q3-km"]="q3_k_m"
	["q3-kl"]="q3_k_l"
	["q4-ks"]="q4_k_s"
	["q4-km"]="q4_k_m"
	["q5-ks"]="q5_k_s"
	["q5-km"]="q5_k_m"
	["q6-k"]="q6_k"
	["q8-0"]="q8_0"
	)

	if [ "${{ env.QUANT_LEVEL }}" = "all" ]; then
	quant_levels=("q2-k" "q3-ks" "q3-km" "q3-kl" "q4-ks" "q4-km" "q5-ks" "q5-km" "q6-k" "q8-0")
	else
	quant_levels=("${{ env.QUANT_LEVEL }}")
	fi

	for quant in "${quant_levels[@]}"; do
	new_name="${{ env.MODEL_NAME }}-${quant_map[${quant}]}.gguf"
	mv "/mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/model.gguf" "/mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/${new_name}"
	huggingface-cli upload "${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }}" "/mnt/models/${{ env.MODEL_NAME }}/gguf/${quant}/" .
	done

	rm -rf /mnt/models/${{ env.MODEL_NAME }}/gguf/*
	huggingface-cli logout
	rm -rf llama.cpp/build/

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Convert model to gguf with specified quant #121

Workflow file

Convert model to gguf with specified quant #121

Jobs

Run details

Workflow file for this run