-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Amit Raj <[email protected]>
- Loading branch information
1 parent
a22a719
commit 7e53ca0
Showing
19 changed files
with
1,319 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
<!DOCTYPE html> | ||
<html class="writer-html5" lang="en"> | ||
<head> | ||
<meta charset="utf-8" /> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | ||
<title>QEfficient.transformers.quantizers.auto — efficient-transformers main documentation</title> | ||
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=80d5e7a1" /> | ||
<link rel="stylesheet" type="text/css" href="../../../../_static/css/theme.css?v=19f00094" /> | ||
<link rel="stylesheet" type="text/css" href="../../../../_static/my_theme.css?v=547657ed" /> | ||
|
||
|
||
<!--[if lt IE 9]> | ||
<script src="../../../../_static/js/html5shiv.min.js"></script> | ||
<![endif]--> | ||
|
||
<script src="../../../../_static/jquery.js?v=5d32c60e"></script> | ||
<script src="../../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script> | ||
<script data-url_root="../../../../" id="documentation_options" src="../../../../_static/documentation_options.js?v=d01aebe5"></script> | ||
<script src="../../../../_static/doctools.js?v=888ff710"></script> | ||
<script src="../../../../_static/sphinx_highlight.js?v=4825356b"></script> | ||
<script src="../../../../_static/js/theme.js"></script> | ||
<link rel="index" title="Index" href="../../../../genindex.html" /> | ||
<link rel="search" title="Search" href="../../../../search.html" /> | ||
</head> | ||
|
||
<body class="wy-body-for-nav"> | ||
<div class="wy-grid-for-nav"> | ||
<nav data-toggle="wy-nav-shift" class="wy-nav-side"> | ||
<div class="wy-side-scroll"> | ||
<div class="wy-side-nav-search" > | ||
|
||
|
||
|
||
<a href="../../../../index.html" class="icon icon-home"> | ||
efficient-transformers | ||
</a> | ||
<div role="search"> | ||
<form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get"> | ||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" /> | ||
<input type="hidden" name="check_keywords" value="yes" /> | ||
<input type="hidden" name="area" value="default" /> | ||
</form> | ||
</div> | ||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu"> | ||
<p class="caption" role="heading"><span class="caption-text">Getting Started</span></p> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/introduction.html">Introduction Qualcomm <code class="docutils literal notranslate"><span class="pre">efficient-transformers</span></code> library</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/validate.html">Validated Models</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/validate.html#models-coming-soon">Models Coming Soon</a></li> | ||
</ul> | ||
<p class="caption" role="heading"><span class="caption-text">Installation</span></p> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/installation.html">Pre-requisites</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/installation.html#linux-installation">Linux Installation</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/installation.html#sanity-check">Sanity Check</a></li> | ||
</ul> | ||
<p class="caption" role="heading"><span class="caption-text">Quick start</span></p> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/quick_start.html">Transformed models and QPC storage</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/quick_start.html#command-line-interface">Command Line Interface</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/quick_start.html#python-api">Python API</a></li> | ||
</ul> | ||
<p class="caption" role="heading"><span class="caption-text">Command Line Interface Use (CLI)</span></p> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/cli_api.html"><code class="docutils literal notranslate"><span class="pre">QEfficient.cloud.infer</span></code></a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/cli_api.html#qefficient-cloud-execute"><code class="docutils literal notranslate"><span class="pre">QEfficient.cloud.execute</span></code></a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/cli_api.html#qefficient-cloud-compile"><code class="docutils literal notranslate"><span class="pre">QEfficient.cloud.compile</span></code></a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/cli_api.html#qefficient-cloud-export"><code class="docutils literal notranslate"><span class="pre">QEfficient.cloud.export</span></code></a></li> | ||
</ul> | ||
<p class="caption" role="heading"><span class="caption-text">Python API</span></p> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/hl_api.html">High Level API</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/ll_api.html">Low Level API</a></li> | ||
</ul> | ||
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/blogs.html">Train anywhere, Infer on Qualcomm Cloud AI 100</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/blogs.html#how-to-quadruple-llm-decoding-performance-with-speculative-decoding-spd-and-microscaling-mx-formats-on-qualcomm-cloud-ai-100">How to Quadruple LLM Decoding Performance with Speculative Decoding (SpD) and Microscaling (MX) Formats on Qualcomm® Cloud AI 100</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/blogs.html#power-efficient-acceleration-for-large-language-models-qualcomm-cloud-ai-sdk">Power-efficient acceleration for large language models – Qualcomm Cloud AI SDK</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/blogs.html#qualcomm-cloud-ai-100-accelerates-large-language-model-inference-by-2x-using-microscaling-mx-formats">Qualcomm Cloud AI 100 Accelerates Large Language Model Inference by ~2x Using Microscaling (Mx) Formats</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/blogs.html#qualcomm-cloud-ai-introduces-efficient-transformers-one-api-infinite-possibilities">Qualcomm Cloud AI Introduces Efficient Transformers: One API, Infinite Possibilities</a></li> | ||
</ul> | ||
<p class="caption" role="heading"><span class="caption-text">Reference</span></p> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/reference.html">Qualcomm Cloud AI home</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/reference.html#qualcomm-cloud-ai-sdk-download">Qualcomm Cloud AI SDK download</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/reference.html#qualcomm-cloud-ai-api-reference">Qualcomm Cloud AI API reference</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/reference.html#user-guide">User Guide</a></li> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../../source/reference.html#ocp-microscaling-formats-mx-specification">OCP Microscaling Formats (MX) Specification</a></li> | ||
</ul> | ||
|
||
</div> | ||
</div> | ||
</nav> | ||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" > | ||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i> | ||
<a href="../../../../index.html">efficient-transformers</a> | ||
</nav> | ||
|
||
<div class="wy-nav-content"> | ||
<div class="rst-content"> | ||
<div role="navigation" aria-label="Page navigation"> | ||
<ul class="wy-breadcrumbs"> | ||
<li><a href="../../../../index.html" class="icon icon-home" aria-label="Home"></a></li> | ||
<li class="breadcrumb-item"><a href="../../../index.html">Module code</a></li> | ||
<li class="breadcrumb-item active">QEfficient.transformers.quantizers.auto</li> | ||
<li class="wy-breadcrumbs-aside"> | ||
</li> | ||
</ul> | ||
<hr/> | ||
</div> | ||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> | ||
<div itemprop="articleBody"> | ||
|
||
<h1>Source code for QEfficient.transformers.quantizers.auto</h1><div class="highlight"><pre> | ||
<span></span><span class="c1"># -----------------------------------------------------------------------------</span> | ||
<span class="c1">#</span> | ||
<span class="c1"># Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.</span> | ||
<span class="c1"># SPDX-License-Identifier: BSD-3-Clause</span> | ||
<span class="c1">#</span> | ||
<span class="c1"># ----------------------------------------------------------------------------</span> | ||
|
||
<span class="kn">from</span> <span class="nn">transformers.quantizers.auto</span> <span class="kn">import</span> <span class="n">AUTO_QUANTIZATION_CONFIG_MAPPING</span><span class="p">,</span> <span class="n">AUTO_QUANTIZER_MAPPING</span> | ||
|
||
<span class="kn">from</span> <span class="nn">QEfficient.transformers.quantizers.quantizer_awq</span> <span class="kn">import</span> <span class="n">QEffAwqConfig</span><span class="p">,</span> <span class="n">QEffAwqQuantizer</span> | ||
<span class="kn">from</span> <span class="nn">QEfficient.transformers.quantizers.quantizer_gptq</span> <span class="kn">import</span> <span class="n">QEffGPTQConfig</span><span class="p">,</span> <span class="n">QEffGPTQQuantizer</span> | ||
|
||
<span class="n">QEFF_AUTO_QUANTIZER_MAPPING</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"awq"</span><span class="p">:</span> <span class="n">QEffAwqQuantizer</span><span class="p">,</span> <span class="s2">"gptq"</span><span class="p">:</span> <span class="n">QEffGPTQQuantizer</span><span class="p">}</span> | ||
<span class="n">QEFF_AUTO_QUANTIZATION_CONFIG_MAPPING</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"awq"</span><span class="p">:</span> <span class="n">QEffAwqConfig</span><span class="p">,</span> <span class="s2">"gptq"</span><span class="p">:</span> <span class="n">QEffGPTQConfig</span><span class="p">}</span> | ||
|
||
|
||
<span class="k">def</span> <span class="nf">with_replaced_quantizers</span><span class="p">(</span><span class="n">func</span><span class="p">):</span> | ||
<span class="k">def</span> <span class="nf">wrapper</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> | ||
<span class="n">transformers_replaced_quantization_config_mapping</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span> | ||
<span class="n">transformers_replaced_quantizer_mapping</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span> | ||
|
||
<span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">QEFF_AUTO_QUANTIZATION_CONFIG_MAPPING</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span> | ||
<span class="c1"># Replace quantization config</span> | ||
<span class="n">transformers_replaced_quantization_config_mapping</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">AUTO_QUANTIZATION_CONFIG_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> | ||
<span class="n">AUTO_QUANTIZATION_CONFIG_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">QEFF_AUTO_QUANTIZATION_CONFIG_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> | ||
|
||
<span class="c1"># Replace quantizer</span> | ||
<span class="n">transformers_replaced_quantizer_mapping</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">AUTO_QUANTIZER_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> | ||
<span class="n">AUTO_QUANTIZER_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">QEFF_AUTO_QUANTIZER_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> | ||
|
||
<span class="c1"># Call the function for loading quantized models here</span> | ||
<span class="n">out</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | ||
|
||
<span class="c1"># Put back quantization config and quantizer</span> | ||
<span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">QEFF_AUTO_QUANTIZATION_CONFIG_MAPPING</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span> | ||
<span class="n">AUTO_QUANTIZATION_CONFIG_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">transformers_replaced_quantization_config_mapping</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> | ||
<span class="n">AUTO_QUANTIZER_MAPPING</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">transformers_replaced_quantizer_mapping</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> | ||
|
||
<span class="k">return</span> <span class="n">out</span> | ||
|
||
<span class="k">return</span> <span class="n">wrapper</span> | ||
</pre></div> | ||
|
||
</div> | ||
</div> | ||
<footer> | ||
|
||
<hr/> | ||
|
||
<div role="contentinfo"> | ||
<p>© Copyright 2024, Qualcomm.</p> | ||
</div> | ||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a | ||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a> | ||
provided by <a href="https://readthedocs.org">Read the Docs</a>. | ||
|
||
|
||
</footer> | ||
</div> | ||
</div> | ||
</section> | ||
</div> | ||
<script> | ||
jQuery(function () { | ||
SphinxRtdTheme.Navigation.enable(true); | ||
}); | ||
</script> | ||
|
||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.