"use strict";(self.webpackChunkrspress_doc_template=self.webpackChunkrspress_doc_template||[]).push([["5817"],{25831:function(n,e,s){s.r(e);var i=s(85893),o=s(50065);function r(n){let e=Object.assign({h1:"h1",a:"a",h2:"h2",h3:"h3",p:"p",div:"div",pre:"pre",code:"code",span:"span",ul:"ul",li:"li",ol:"ol",strong:"strong"},(0,o.ah)(),n.components);return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.h1,{id:"qconfig-in-detail",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#qconfig-in-detail",children:"#"}),"QConfig in Detail"]}),"\n",(0,i.jsxs)(e.h2,{id:"definition-and-principle",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#definition-and-principle",children:"#"}),"Definition and Principle"]}),"\n",(0,i.jsxs)(e.h3,{id:"definition-of-qconfig",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#definition-of-qconfig",children:"#"}),"Definition of QConfig"]}),"\n",(0,i.jsx)(e.p,{children:"The qconfig refers to quantization configuration, which is a key set of parameters in the quantization process of deep learning models.\nThe quantization mode of the model is determined by qconfig, which needs to be set for the model before preparing the qat / calibration model."}),"\n",(0,i.jsxs)(e.div,{className:"rspress-directive warning",children:[(0,i.jsx)(e.div,{className:"rspress-directive-title",children:"Attention"}),(0,i.jsx)(e.div,{className:"rspress-directive-content",children:(0,i.jsx)(e.p,{children:"Due to historical reasons, there are different definitions and usages of qconfig in the Plugin. Earlier versions of qconfig will be deprecated in the near future, and we only recommend that you use the qconfig usage described in this document."})})]}),"\n",(0,i.jsx)(e.p,{children:"A qconfig object can set three keywords: input, weight, and output, representing the quantization configuration of the operator's input, weight, and output respectively. When preparing model, these configurations determine whether to insert FakeQuantize or FakeCast nodes at the corresponding positions. None means no nodes will be inserted."}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" torch"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" QConfig"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fake_quantize "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeQuantize"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fake_cast "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeCast"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"observer_v2 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" MinMaxObserver"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"dtype "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" qint8"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QConfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    input"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FakeQuantize."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MinMaxObserver,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qscheme"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"torch.per_channel_symmetric,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        ch_axis"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ),"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    output"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FakeCast."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"torch.float16),"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# activation=xxx Earlier usage, same as the output keyword. Still compatible, but it's recommended to use the output keyword."})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n",(0,i.jsxs)(e.h3,{id:"definition-of-fakequantize",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#definition-of-fakequantize",children:"#"}),"Definition of FakeQuantize"]}),"\n",(0,i.jsx)(e.p,{children:"FakeQuantize is a fake quantization node that performs quantization and dequantization operations on the input. Inserting fake quantization can simulate the errors caused by quantization in the forward pass of a floating-point model. The horizon_plugin_pytorch supports three types of fake quantization: FakeQuantize, PACTFakeQuantize, and _LearnableFakeQuantize. We recommend using the statistic-based FakeQuantize. The document won't introduce PACTFakeQuantize and _LearnableFakeQuantize. If required, please read the papers before using them."}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# statistic-based FakeQuantize"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fake_quantize "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeQuantize"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# https://arxiv.org/pdf/1805.06085"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"pact_fake_quantize "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" PACTFakeQuantize"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# https://arxiv.org/pdf/1902.08153"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"_learnable_fake_quantize "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" _LearnableFakeQuantize"})]}),"\n"]})})}),"\n",(0,i.jsx)(e.p,{children:"You can call the with_args method of FakeQuantize to get a constructor and use it to construct qconfig as shown in the previous section. The parameters of with_args include parameters supported by FakeQuantize and observer, theoretically allowing configuration of all parameters declared in the init method of the FakeQuantize and observer classes. However, to avoid unnecessary details, we recommend you to configure the observer-related parameters only."}),"\n",(0,i.jsx)(e.p,{children:"Different observers have different parameters. Below are examples of constructing FakeQuantize with common used observers. For the specific usage of other observers, see the calibration section."}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" torch"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" QConfig"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fake_quantize "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeQuantize"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"observer_v2 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" MinMaxObserver"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FixedScaleObserver"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" MSEObserver"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"dtype "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" qint8"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# The __init__ method of MinMaxObserver includes many parameters. The with_args method can control these parameters."})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# We only recommend you to set a few parameters as in the fq_constructor_1 example."})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# def __init__("})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     self,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     averaging_constant: float = 0.01,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     ch_axis: int = -1,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     dtype: Union[torch.dtype, QuantDType] = qint8,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     qscheme: torch.qscheme = torch.per_tensor_symmetric,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     quant_min: int = None,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     quant_max: int = None,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     is_sync_quantize: bool = False,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"#     factory_kwargs: Dict = None,"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# ) -> None:"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fq_constructor_1 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeQuantize"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MinMaxObserver,   "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Suitable for input/output/weight in qat and weight in calibration."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    averaging_constant"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0.01"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:",   "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# When performing qat after calibration, the averaging_constant of input/output can be set to 0 to fix the scale."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Quantization type, set based on the support of the operator."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qscheme"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"torch.per_channel_symmetric,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Only weight supports per-channel quantization."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ch_axis"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:",  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Specify the channel for per-channel quantization."})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Similarly, you can check the __init__ method of FixedScaleObserver and MSEObserver to learn the configurable parameters."})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fq_constructor_2 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeQuantize"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FixedScaleObserver,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Fixed scale, will not change in any conditions."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Quantization type, set based on the support of the operator."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    scale"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"INPUT_ABS_MAX "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"/"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"128"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:",  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# scale value, use maximum absolute value divided by the maximum quantization type value."})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fq_constructor_3 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeQuantize"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MSEObserver,   "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Suitable for input/output in calibration."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Quantization type, set based on the support of the operator."})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QConfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"fq_constructor_x,"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ..."})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n",(0,i.jsxs)(e.h3,{id:"definition-of-fakecast",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#definition-of-fakecast",children:"#"}),"Definition of FakeCast"]}),"\n",(0,i.jsx)(e.p,{children:"FakeCast is a fake conversion node that converts the input to float32 data type. If the data type is float16, it also simulates the truncation error caused by converting value to float16. This node is mainly used to mark operators that require floating-point computation."}),"\n",(0,i.jsx)(e.p,{children:"The method of using FakeCast to construct qconfig is similar to FakeQuantize, but it only has one parameter."}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" torch"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" QConfig"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fake_cast "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeCast"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QConfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    input"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FakeCast."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"torch.float16), "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# set based on the support of the operator."})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ..."})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n",(0,i.jsxs)(e.h3,{id:"construct-qconfig",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#construct-qconfig",children:"#"}),"Construct QConfig"]}),"\n",(0,i.jsx)(e.p,{children:"There are two methods for you to choose from when constructing Qconfig:"}),"\n",(0,i.jsxs)(e.ul,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Construct the QConfig object directly as introduced above. This method is flexible, allowing the configuration of any configurable parameter, but requires deep understanding of QConfig."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Use the get_qconfig interface. This interface is simpler and easier to use than directly constructing QConfig objects but less flexible, and cannot be used for advanced requirements."}),"\n"]}),"\n"]}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" torch"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" get_qconfig"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"observer_v2 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" MinMaxObserver"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" QConfig"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"fake_quantize "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" FakeQuantize"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"dtype "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" qint8"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# qconfig_1 / qconfig_2 / qconfig_3 / qconfig_4 are equivalent."})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig_1 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QConfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FakeQuantize."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MinMaxObserver,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        averaging_constant"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0.01"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qscheme"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"torch.per_channel_symmetric,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        ch_axis"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ),"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    output"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FakeQuantize."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MinMaxObserver,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        averaging_constant"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qscheme"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"torch.per_tensor_symmetric,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        ch_axis"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"=-"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"1"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ),"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig_2 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QConfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FakeQuantize."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MinMaxObserver,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qscheme"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"torch.per_channel_symmetric,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        ch_axis"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ),"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    output"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"FakeQuantize."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"with_args"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MinMaxObserver,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        averaging_constant"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ),"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig_3 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"get_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    observer"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"MinMaxObserver,   "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Input and output observer types, only supports MinMaxObserver and MSEObserver in horizon_plugin_pytorch.quantization.observer_v2, default is MinMaxObserver."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    in_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:",  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Input data type, set based on the support of the operator. None means the input keyword of QConfig is None, default is None."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Weight data type, set based on the support of the operator. None means the weight keyword of QConfig is None, default is qint8."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    out_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Output data type, set based on the support of the operator. None means the output keyword of QConfig is None, default is qint8."})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    fix_scale"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"True"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:",   "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Whether to fix the input and output scales."})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig_4 "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"get_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(fix_scale"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"True"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})]}),"\n"]})})}),"\n",(0,i.jsxs)(e.h2,{id:"set-qconfig-via-qconfigsetter",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#set-qconfig-via-qconfigsetter",children:"#"}),"Set qconfig via QconfigSetter"]}),"\n",(0,i.jsx)(e.p,{children:"QconfigSetter automatically sets qconfig according to the specified rules based on the model's computation graph, and it is our most recommended method for setting qconfig. The use of QconfigSetter depends on the graph mode of the prepare process, with the usage as follows:"}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" prepare"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" PrepareMethod"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" get_qconfig"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qconfig_setter "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"*"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number"}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:"qat_model "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"prepare"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    model,"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    example_inputs"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"example_inputs,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# The graph mode requires providing model inputs to obtain the computation graph"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qconfig_setter"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QconfigSetter"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        reference_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"get_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# qconfig used to provide the observer"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        templates"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"["}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"<"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"Templates"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:">"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"],  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# User-configured templates"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        enable_optimize"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"True"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:",  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Enable all default optimizations"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        save_dir"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"./qconfig_setting"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:",  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Save path for qconfig configuration results (qconfig.pt file) and changelog"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ),"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    method"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"PrepareMethod.JIT_STRIP,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# QconfigSetter depends on the computation graph"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n",(0,i.jsxs)(e.h3,{id:"template-description",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#template-description",children:"#"}),"Template Description"]}),"\n",(0,i.jsx)(e.p,{children:"The templates you can configure are as follows:"}),"\n",(0,i.jsxs)(e.ol,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"ModuleNameTemplate"})," (required, needs to cover all quantized operators): Specify dtype configuration or quantization threshold through module name."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"ConvDtypeTemplate"})," (required): Specify the input and weight dtype of Conv-type operators."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"MatmulDtypeTemplate"})," (required): Specify the input dtype of Matmul operators."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"SensitivityTemplate"})," (optional): Configure the top-n operators to high precision according to sensitivity."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"LoadFromFileTemplate"}),": Load the qconfig.pt file, which is used to reproduce previous quantization configurations. ",(0,i.jsx)(e.strong,{children:"At this time, enable_optimize must be False; otherwise, the correctness of the configuration results cannot be guaranteed, and there may be CPU operators during deployment."})]}),"\n"]}),"\n"]}),"\n",(0,i.jsx)(e.p,{children:"These templates take effect in the order of configuration, and the configuration of the previous template can be overwritten by the subsequent one."}),"\n",(0,i.jsx)(e.p,{children:(0,i.jsxs)(e.strong,{children:["Detailed Explanation of ",(0,i.jsx)(e.code,{children:"ModuleNameTemplate"})]})}),"\n",(0,i.jsxs)(e.ol,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"The module name can be an operator name or a prefix. When different module names in a ModuleNameTemplate have an overriding relationship, the longer name has higher priority. For example:"}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ModuleNameTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    {"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'""'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint8,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Global qint8"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"head"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint16,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Higher priority than the global configuration; head is finally configured as int16"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"head.conv0"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": torch.float16,  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Higher priority than the configuration of head; head.conv is finally configured to output float16"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    }"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:["The threshold of the operator can be specified (provided that there is a corresponding pseudo-quantization node in the operator). At this time, the calculation method of the quantization scale is ",(0,i.jsx)(e.code,{children:"scale = threshold / -qdtype.min"}),". For example:"]}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ModuleNameTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    {"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"quant"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"dtype"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint8, "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"threshold"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"1.0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"},  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# The quantization scale of quant is 1.0/128"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    }"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"By default, dtype and threshold are configured on the output of the operator. You can configure the input or weight by specifying the key. When the operator has multiple inputs, None can be used as a placeholder. For example:"}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ModuleNameTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    {"})}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"conv0"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"input"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint8, "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"weight"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint16},"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"conv1"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"dtype"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"input"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint16}, "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"threshold"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"weight"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"1.0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"}},"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"matmul0"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"dtype"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"input"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": [qint16, "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"]}, "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"threshold"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": {"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"input"'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": ["}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"1.0"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:", "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"]}},  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Configure the first input of matmul0 as int16 with a fixed scale of 1.0/32768, and do not configure the second input"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    }"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n"]}),"\n"]}),"\n",(0,i.jsxs)(e.h3,{id:"scenario-examples",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#scenario-examples",children:"#"}),"Scenario Examples"]}),"\n",(0,i.jsxs)(e.ol,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"All int8:"}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QconfigSetter"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    reference_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"get_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    templates"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"["})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ModuleNameTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"({"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'""'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint8}),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# All operators are configured to output int8"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ConvDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8, weight_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# The input and weight of conv are configured as int8"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"MatmulDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtypes"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Both inputs of matmul are configured as int8"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ],"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Feature int16, weight int8:"}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QconfigSetter"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    reference_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"get_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    templates"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"["})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ModuleNameTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"({"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'""'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint16}),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# All operators are configured to output int16"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ConvDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint16, weight_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# The input of conv is configured as int16, and the weight is configured as int8"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"MatmulDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtypes"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint16),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Both inputs of matmul are configured as int16"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ],"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Gemm operators with double int8, other operators with fp16:"}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QconfigSetter"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    reference_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"get_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    templates"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"["})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ModuleNameTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"({"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'""'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": torch.float16}),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# All operators are configured to output fp16"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ConvDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8, weight_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# The input and weight of conv are configured as int8"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"MatmulDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtypes"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Both inputs of matmul are configured as int8"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ],"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Gemm operators with double int8, other operators with int16, and high-sensitivity gemm configured as int16:"}),"\n",(0,i.jsx)(e.pre,{className:"code",children:(0,i.jsx)(e.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(e.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"QconfigSetter"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    reference_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"get_qconfig"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    templates"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"["})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ModuleNameTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"({"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-string-expression)"},children:'""'}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:": qint16}),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# All operators are configured to output int16"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"ConvDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8, weight_dtype"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# The input and weight of conv are configured as int8"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"MatmulDtypeTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(input_dtypes"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"qint8),  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# Both inputs of matmul are configured as int8"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-function)"},children:"SensitivityTemplate"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(  "}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-comment)"},children:"# If the highly sensitive feat or weight is configured as int8, modify it to int16"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"                sensitive_table"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"...,"})]}),"\n",(0,i.jsxs)(e.span,{className:"line line-number",children:[(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"                topk_or_ratio"}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"...,"})]}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"            ),"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    ],"})}),"\n",(0,i.jsx)(e.span,{className:"line line-number",children:(0,i.jsx)(e.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n"]}),"\n"]}),"\n",(0,i.jsxs)(e.h3,{id:"description-of-default-optimization-passes",children:[(0,i.jsx)(e.a,{className:"header-anchor","aria-hidden":"true",href:"#description-of-default-optimization-passes",children:"#"}),"Description of Default Optimization Passes"]}),"\n",(0,i.jsx)(e.p,{children:"In addition to the templates you can configure, QconfigSetter also integrates a series of optimization and legalization templates, which are explained in this section."}),"\n",(0,i.jsxs)(e.ol,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"CanonicalizeTemplate"}),": Legalize dtype configuration according to operator types. The current default rules are:"]}),"\n",(0,i.jsxs)(e.ul,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Gemm-type operators do not support float inputs(include weight)."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Interpolation-type operators: There are different restrictions under different march."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Special operators such as DPP and RPP only support int8."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"General rules for other operators: The input dtype and output dtype of an operator cannot have both qint and float."}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"EqualizeInOutScaleTemplate"}),": For relu, concat, and stack operators, the scale should be counted after the operator; otherwise, there may be a loss in precision or performance. To this end:"]}),"\n",(0,i.jsxs)(e.ul,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Configure the output dtype of the previous operator as float32."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"When exporting hbir for relu, concat, and stack operators, insert pseudo-quantization at the input, and reuse the output scale for the scale."}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"FuseConvAddTemplate"}),": The hardware supports the fusion of conv + add. To this end:"]}),"\n",(0,i.jsxs)(e.ul,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Configure the output dtype of conv as float32."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Configure the corresponding input dtype of add as float32."}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"GridHighPrecisionTemplate"}),": According to experience, the grid calculation process of grid sample with qint8 is not precise enough, so the relevant operators are automatically configured to high precision."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"InternalQuantsTemplate"}),": In the scenario of segmented model deployment, QuantStub will be inserted at the segmentation points to record the dtype and scale here. The dtype configuration of such QuantStub must be consistent with the input."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"OutputHighPrecisionTemplate"}),": When a Gemm-type operator is used as the model output, configure it to output with high precision."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"PropagateTemplate"}),": For operators split into subgraphs for implementation, there are empirical configurations. For example, the small internal operators of LayerNorm and Softmax should use high precision."]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"SimpleIntPassTemplate"}),": For performance optimization, for computation graphs such as op0->op1->op2, if the following conditions are met at the same time, modify the output type of op1 to int:"]}),"\n",(0,i.jsxs)(e.ul,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"op2 requires int input."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"op0 can output int."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"op1 currently outputs float16 and belongs to the following types:"}),"\n",(0,i.jsxs)(e.ul,{children:["\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"cat, stack."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"mul_scalar."}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsx)(e.p,{children:"Lookup table operators without precision risks (that is, operators that use lookup table implementation by default on fp16)."}),"\n"]}),"\n"]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,i.jsxs)(e.li,{children:["\n",(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.code,{children:"SimplifyTemplate"}),": Delete redundant quantization node configurations (modify the corresponding dtype to None)."]}),"\n"]}),"\n"]})]})}function t(){let n=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},{wrapper:e}=Object.assign({},(0,o.ah)(),n.components);return e?(0,i.jsx)(e,Object.assign({},n,{children:(0,i.jsx)(r,n)})):r(n)}e.default=t,t.__RSPRESS_PAGE_META={},t.__RSPRESS_PAGE_META["latest%2Fen%2Fguide%2Fplugin%2Fuser_guide%2Fqconfig.mdx"]={toc:[{id:"definition-and-principle",text:"Definition and Principle",depth:2},{id:"definition-of-qconfig",text:"Definition of QConfig",depth:3},{id:"definition-of-fakequantize",text:"Definition of FakeQuantize",depth:3},{id:"definition-of-fakecast",text:"Definition of FakeCast",depth:3},{id:"construct-qconfig",text:"Construct QConfig",depth:3},{id:"set-qconfig-via-qconfigsetter",text:"Set qconfig via QconfigSetter",depth:2},{id:"template-description",text:"Template Description",depth:3},{id:"scenario-examples",text:"Scenario Examples",depth:3},{id:"description-of-default-optimization-passes",text:"Description of Default Optimization Passes",depth:3}],title:"QConfig in Detail",frontmatter:{}}}}]);