diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py index 170d2d735..371b0e566 100644 --- a/src/brevitas_examples/llm/main.py +++ b/src/brevitas_examples/llm/main.py @@ -577,7 +577,7 @@ def quantize_llm(args): ) model_config = TransformersModelConfig( pretrained=args.model, - dtype="float16", + dtype=args.dtype, use_chat_template=True, model_parallel=True, accelerator=accelerator,