"use strict";(self.webpackChunkrspress_doc_template=self.webpackChunkrspress_doc_template||[]).push([["7927"],{79418:function(e,s,n){n.r(s);var i=n(85893),a=n(50065);function r(e){let s=Object.assign({h1:"h1",a:"a",p:"p",h2:"h2",pre:"pre",code:"code",span:"span",div:"div",ol:"ol",li:"li",h3:"h3",math:"math",semantics:"semantics",mrow:"mrow",mi:"mi",msub:"msub",mo:"mo",mtext:"mtext",mn:"mn",annotation:"annotation"},(0,a.ah)(),e.components);return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(s.h1,{id:"quantized-awareness-training-guide",children:[(0,i.jsx)(s.a,{className:"header-anchor","aria-hidden":"true",href:"#quantized-awareness-training-guide",children:"#"}),"Quantized Awareness Training Guide"]}),"\n",(0,i.jsx)(s.p,{children:"The quantized awareness training is performed by inserting some pseudo-quantized nodes into the model,\nso as to minimize the loss of accuracy when the model obtained through quantized awareness training is converted into a fixed-point model.\nThe quantized awareness training is no different from traditional model training in that one can start from scratch, build a pseudo-quantized model, and then train on that pseudo-quantized model.\nDue to the limitations of the deployed hardware platform, it is challenging to understand these limitations and build a pseudo-quantization model based on them.\nThe quantized awareness training tool reduces the challenges of developing quantized models by automatically inserting pseudo-quantization operators into the provided floating-point model based on the limitations of the deployment platform."}),"\n",(0,i.jsx)(s.p,{children:"The quantized awareness training is generally more difficult than the training of pure floating-point models due to the various restrictions imposed.\nThe goal of the quantized awareness training tool is to reduce the difficulty of quantized awareness training and to reduce the engineering difficulty of quantized model deployment."}),"\n",(0,i.jsxs)(s.h2,{id:"process-and-example",children:[(0,i.jsx)(s.a,{className:"header-anchor","aria-hidden":"true",href:"#process-and-example",children:"#"}),"Process and Example"]}),"\n",(0,i.jsx)(s.p,{children:"Although our quantized awareness training tool does not mandate that you provide a pre-trained floating-point model at the outset,\nexperience has shown that starting quantized awareness training from a pre-trained high-precision floating-point model generally significantly reduces the difficulty of training."}),"\n",(0,i.jsx)(s.pre,{className:"code",children:(0,i.jsx)(s.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(s.code,{className:"language-python",meta:"",children:[(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# convert the model to QAT state"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"qat_model "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"prepare"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    float_model,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    example_input,"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qconfig_setter "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:" horizon.quantization.qconfig_template.default_qat_qconfig_setter,"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:")."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"to"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(device)"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# load the quantization parameters in the Calibration model"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"qat_model"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"load_state_dict"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(calib_model."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"state_dict"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"())"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# perform quantized awareness training"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# as a filetune process, quantized awareness training generally requires setting a small learning rate"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"optimizer "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" torch"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"optim"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"SGD"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qat_model."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"parameters"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(), lr"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"0.0001"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:", weight_decay"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"2e-4"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"for"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" nepoch "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"in"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"range"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(epoch_num):"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# note the method of controlling the training state of the QAT model here"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    qat_model"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"train"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"set_fake_quantize"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(qat_model, FakeQuantState.QAT)"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"train_one_epoch"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qat_model,"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        nn."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"CrossEntropyLoss"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        optimizer,"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        train_data_loader,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        device,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# note the method of controlling the eval state of the QAT model here"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    qat_model"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"eval"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"set_fake_quantize"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(qat_model, FakeQuantState.VALIDATION)"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# test qat model accuracy"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    top1"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" top5 "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"evaluate"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qat_model,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        eval_data_loader,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        device,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"print"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"QAT model: evaluation Acc@1 '}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-string-expression)"},children:" Acc@5 "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"'}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"format"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"            top1.avg, top5.avg"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        )"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# test quantized model accuracy"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"qat_hbir_model "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"hbdk4"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"export"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qat_model. example_input"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"quantized_hbir_model "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" hbdk4"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"compiler"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"convert"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(qat_hbir_model)"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"top1"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" top5 "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"evaluate"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    quantized_hbir_model,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    eval_data_loader,"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"print"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"Quantized model: evaluation Acc@1 '}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-string-expression)"},children:" Acc@5 "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"'}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"format"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        top1.avg, top5.avg"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n",(0,i.jsxs)(s.div,{className:"rspress-directive warning",children:[(0,i.jsx)(s.div,{className:"rspress-directive-title",children:"Attention"}),(0,i.jsx)(s.div,{className:"rspress-directive-content",children:(0,i.jsx)(s.p,{children:"Due to the underlying limitations of the deployment platform, the QAT model cannot fully represent the final on-board accuracy,\nplease make sure to monitor the quantized model accuracy to ensure that the quantized model accuracy is normal, otherwise the model on-board dropout problem may occur."})})]}),"\n",(0,i.jsx)(s.p,{children:"As can be seen from the above sample code, there are two additional steps in quantized awareness training compared to traditional pure floating-point model training:"}),"\n",(0,i.jsxs)(s.ol,{children:["\n",(0,i.jsxs)(s.li,{children:["\n",(0,i.jsx)(s.p,{children:"prepare. The goal of this step is to transform the floating-point network and insert pseudo-quantized nodes."}),"\n"]}),"\n",(0,i.jsxs)(s.li,{children:["\n",(0,i.jsx)(s.p,{children:"Load the Calibration model parameters. A better initialization is obtained by loading the pseudo-quantization parameters obtained from Calibration."}),"\n",(0,i.jsx)(s.p,{children:"Modifications of state_dict should not only focus on the keys and values, but also on the _metadata. Below is an example of copying a state_dict:"}),"\n",(0,i.jsx)(s.pre,{className:"code",children:(0,i.jsx)(s.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(s.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"new_state_dict "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"OrderedDict"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"for"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" k"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" v "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"in"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" state_dict"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"items"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"():"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    new_state_dict"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"["}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"k"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"]"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" v"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"if"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"hasattr"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(state_dict, "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"_metadata"'}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"):"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    new_state_dict"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"_metadata "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" copy"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"deepcopy"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(state_dict._metadata)"})]}),"\n"]})})}),"\n"]}),"\n"]}),"\n",(0,i.jsxs)(s.div,{className:"rspress-directive warning",children:[(0,i.jsx)(s.div,{className:"rspress-directive-title",children:"Attention"}),(0,i.jsx)(s.div,{className:"rspress-directive-content",children:(0,i.jsxs)(s.p,{children:["The compatibility of operators depends on the ",(0,i.jsx)(s.code,{children:"_version"})," variable of ",(0,i.jsx)(s.code,{children:"torch.nn.Module"}),", which is stored in ",(0,i.jsx)(s.code,{children:"state_dict._metadata"}),".\nPlease ensure that the ",(0,i.jsx)(s.code,{children:"_metadata"})," is preserved during the process of saving or loading ",(0,i.jsx)(s.code,{children:"state_dict"}),", as its absence may lead to compatibility issues."]})})]}),"\n",(0,i.jsx)(s.p,{children:"At this point, the construction of the pseudo-quantized model and the initialization of the parameters are completed,\nand then the regular training iterations and model parameter updates can be performed, and the quantized model accuracy can be monitored."}),"\n",(0,i.jsx)(s.p,{children:"To meet the requirements of segmented deployment or to align with float training strategies, it may be necessary to freeze certain parts of the model during training. You can refer to the following code to perform the freezing:"}),"\n",(0,i.jsx)(s.pre,{className:"code",children:(0,i.jsx)(s.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(s.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"quantization "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" freeze_qat_module"})]}),"\n",(0,i.jsx)(s.span,{className:"line line-number"}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# Model weights / quantization parameters will be fixed, and all operators will be set to eval mode."})}),"\n",(0,i.jsx)(s.span,{className:"line line-number",children:(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-comment)"},children:"# Ensure that the freeze_qat_module interface is called after invoking interfaces like train(), eval(), or set_fake_quantize(), which may change the model state."})}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"freeze_qat_module"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(model)"})]}),"\n"]})})}),"\n",(0,i.jsxs)(s.h2,{id:"pseudo-quantized-operator",children:[(0,i.jsx)(s.a,{className:"header-anchor","aria-hidden":"true",href:"#pseudo-quantized-operator",children:"#"}),"Pseudo-quantized Operator"]}),"\n",(0,i.jsx)(s.p,{children:"The main difference between the quantized awareness training and the traditional floating-point model's training is the insertion of pseudo-quantization operators,\nand, as different quantized awareness training algorithms are also represented by pseudo-quantization operators, here we take a brief introduce the pseudo-quantization operators."}),"\n",(0,i.jsxs)(s.div,{className:"rspress-directive info",children:[(0,i.jsx)(s.div,{className:"rspress-directive-title",children:"Note"}),(0,i.jsx)(s.div,{className:"rspress-directive-content",children:(0,i.jsx)(s.p,{children:"Since the BPU only supports symmetric quantization, here we take the symmetric quantization as an example."})})]}),"\n",(0,i.jsxs)(s.h3,{id:"pseudo-quantization-process",children:[(0,i.jsx)(s.a,{className:"header-anchor","aria-hidden":"true",href:"#pseudo-quantization-process",children:"#"}),"Pseudo-quantization Process"]}),"\n",(0,i.jsxs)(s.p,{children:["Take the int8 quantized awareness training as an example, in general,\nthe pseudo-quantization operator is computed as: ",(0,i.jsxs)(s.span,{className:"katex",children:[(0,i.jsx)(s.span,{className:"katex-mathml",children:(0,i.jsx)(s.math,{xmlns:"http://www.w3.org/1998/Math/MathML",children:(0,i.jsxs)(s.semantics,{children:[(0,i.jsxs)(s.mrow,{children:[(0,i.jsx)(s.mi,{children:"f"}),(0,i.jsx)(s.mi,{children:"a"}),(0,i.jsx)(s.mi,{children:"k"}),(0,i.jsxs)(s.msub,{children:[(0,i.jsx)(s.mi,{children:"e"}),(0,i.jsx)(s.mi,{children:"q"})]}),(0,i.jsx)(s.mi,{children:"u"}),(0,i.jsx)(s.mi,{children:"a"}),(0,i.jsx)(s.mi,{children:"n"}),(0,i.jsxs)(s.msub,{children:[(0,i.jsx)(s.mi,{children:"t"}),(0,i.jsx)(s.mi,{children:"x"})]}),(0,i.jsx)(s.mo,{children:"="}),(0,i.jsx)(s.mi,{children:"c"}),(0,i.jsx)(s.mi,{children:"l"}),(0,i.jsx)(s.mi,{children:"i"}),(0,i.jsx)(s.mi,{children:"p"}),(0,i.jsx)(s.mo,{stretchy:"false",children:"("}),(0,i.jsx)(s.mi,{children:"r"}),(0,i.jsx)(s.mi,{children:"o"}),(0,i.jsx)(s.mi,{children:"u"}),(0,i.jsx)(s.mi,{children:"n"}),(0,i.jsx)(s.mi,{children:"d"}),(0,i.jsx)(s.mo,{stretchy:"false",children:"("}),(0,i.jsx)(s.mi,{children:"x"}),(0,i.jsx)(s.mi,{mathvariant:"normal",children:"/"}),(0,i.jsx)(s.mi,{children:"s"}),(0,i.jsx)(s.mi,{children:"c"}),(0,i.jsx)(s.mi,{children:"a"}),(0,i.jsx)(s.mi,{children:"l"}),(0,i.jsx)(s.mi,{children:"e"}),(0,i.jsx)(s.mo,{stretchy:"false",children:")"}),(0,i.jsx)(s.mtext,{children:"\uFF0C"}),(0,i.jsx)(s.mo,{children:"\u2212"}),(0,i.jsx)(s.mn,{children:"128"}),(0,i.jsx)(s.mo,{separator:"true",children:","}),(0,i.jsx)(s.mn,{children:"127"}),(0,i.jsx)(s.mo,{stretchy:"false",children:")"}),(0,i.jsx)(s.mo,{children:"\u2217"}),(0,i.jsx)(s.mi,{children:"s"}),(0,i.jsx)(s.mi,{children:"c"}),(0,i.jsx)(s.mi,{children:"a"}),(0,i.jsx)(s.mi,{children:"l"}),(0,i.jsx)(s.mi,{children:"e"})]}),(0,i.jsx)(s.annotation,{encoding:"application/x-tex",children:"fake_quant_x = clip(round(x / scale)\uFF0C-128, 127) * scale"})]})})}),(0,i.jsxs)(s.span,{className:"katex-html","aria-hidden":"true",children:[(0,i.jsxs)(s.span,{className:"base",children:[(0,i.jsx)(s.span,{className:"strut",style:{height:"0.9805em",verticalAlign:"-0.2861em"}}),(0,i.jsx)(s.span,{className:"mord mathnormal",style:{marginRight:"0.10764em"},children:"f"}),(0,i.jsx)(s.span,{className:"mord mathnormal",style:{marginRight:"0.03148em"},children:"ak"}),(0,i.jsxs)(s.span,{className:"mord",children:[(0,i.jsx)(s.span,{className:"mord mathnormal",children:"e"}),(0,i.jsx)(s.span,{className:"msupsub",children:(0,i.jsxs)(s.span,{className:"vlist-t vlist-t2",children:[(0,i.jsxs)(s.span,{className:"vlist-r",children:[(0,i.jsx)(s.span,{className:"vlist",style:{height:"0.1514em"},children:(0,i.jsxs)(s.span,{style:{top:"-2.55em",marginLeft:"0em",marginRight:"0.05em"},children:[(0,i.jsx)(s.span,{className:"pstrut",style:{height:"2.7em"}}),(0,i.jsx)(s.span,{className:"sizing reset-size6 size3 mtight",children:(0,i.jsx)(s.span,{className:"mord mathnormal mtight",style:{marginRight:"0.03588em"},children:"q"})})]})}),(0,i.jsx)(s.span,{className:"vlist-s",children:"\u200B"})]}),(0,i.jsx)(s.span,{className:"vlist-r",children:(0,i.jsx)(s.span,{className:"vlist",style:{height:"0.2861em"},children:(0,i.jsx)(s.span,{})})})]})})]}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"u"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"an"}),(0,i.jsxs)(s.span,{className:"mord",children:[(0,i.jsx)(s.span,{className:"mord mathnormal",children:"t"}),(0,i.jsx)(s.span,{className:"msupsub",children:(0,i.jsxs)(s.span,{className:"vlist-t vlist-t2",children:[(0,i.jsxs)(s.span,{className:"vlist-r",children:[(0,i.jsx)(s.span,{className:"vlist",style:{height:"0.1514em"},children:(0,i.jsxs)(s.span,{style:{top:"-2.55em",marginLeft:"0em",marginRight:"0.05em"},children:[(0,i.jsx)(s.span,{className:"pstrut",style:{height:"2.7em"}}),(0,i.jsx)(s.span,{className:"sizing reset-size6 size3 mtight",children:(0,i.jsx)(s.span,{className:"mord mathnormal mtight",children:"x"})})]})}),(0,i.jsx)(s.span,{className:"vlist-s",children:"\u200B"})]}),(0,i.jsx)(s.span,{className:"vlist-r",children:(0,i.jsx)(s.span,{className:"vlist",style:{height:"0.15em"},children:(0,i.jsx)(s.span,{})})})]})})]}),(0,i.jsx)(s.span,{className:"mspace",style:{marginRight:"0.2778em"}}),(0,i.jsx)(s.span,{className:"mrel",children:"="}),(0,i.jsx)(s.span,{className:"mspace",style:{marginRight:"0.2778em"}})]}),(0,i.jsxs)(s.span,{className:"base",children:[(0,i.jsx)(s.span,{className:"strut",style:{height:"1em",verticalAlign:"-0.25em"}}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"c"}),(0,i.jsx)(s.span,{className:"mord mathnormal",style:{marginRight:"0.01968em"},children:"l"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"i"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"p"}),(0,i.jsx)(s.span,{className:"mopen",children:"("}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"ro"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"u"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"n"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"d"}),(0,i.jsx)(s.span,{className:"mopen",children:"("}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"x"}),(0,i.jsx)(s.span,{className:"mord",children:"/"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"sc"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"a"}),(0,i.jsx)(s.span,{className:"mord mathnormal",style:{marginRight:"0.01968em"},children:"l"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"e"}),(0,i.jsx)(s.span,{className:"mclose",children:")"}),(0,i.jsx)(s.span,{className:"mord cjk_fallback",children:"\uFF0C"}),(0,i.jsx)(s.span,{className:"mspace",style:{marginRight:"0.2222em"}}),(0,i.jsx)(s.span,{className:"mbin",children:"\u2212"}),(0,i.jsx)(s.span,{className:"mspace",style:{marginRight:"0.2222em"}})]}),(0,i.jsxs)(s.span,{className:"base",children:[(0,i.jsx)(s.span,{className:"strut",style:{height:"1em",verticalAlign:"-0.25em"}}),(0,i.jsx)(s.span,{className:"mord",children:"128"}),(0,i.jsx)(s.span,{className:"mpunct",children:","}),(0,i.jsx)(s.span,{className:"mspace",style:{marginRight:"0.1667em"}}),(0,i.jsx)(s.span,{className:"mord",children:"127"}),(0,i.jsx)(s.span,{className:"mclose",children:")"}),(0,i.jsx)(s.span,{className:"mspace",style:{marginRight:"0.2222em"}}),(0,i.jsx)(s.span,{className:"mbin",children:"\u2217"}),(0,i.jsx)(s.span,{className:"mspace",style:{marginRight:"0.2222em"}})]}),(0,i.jsxs)(s.span,{className:"base",children:[(0,i.jsx)(s.span,{className:"strut",style:{height:"0.6944em"}}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"sc"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"a"}),(0,i.jsx)(s.span,{className:"mord mathnormal",style:{marginRight:"0.01968em"},children:"l"}),(0,i.jsx)(s.span,{className:"mord mathnormal",children:"e"})]})]})]}),"."]}),"\n",(0,i.jsx)(s.p,{children:"Similar to Conv2d, which optimizes the weight and bias parameters through training, the pseudo-quantization operator needs to be trained to optimize the scale parameter.\nHowever, the gradient of round as a step function is 0, which makes it impossible to train the pseudo-quantization operator by backpropagation of the gradient directly.\nTo solve this problem, there are usually two solutions: a statistical-based approach and a learning-based approach."}),"\n",(0,i.jsxs)(s.h3,{id:"statistical-based-approach",children:[(0,i.jsx)(s.a,{className:"header-anchor","aria-hidden":"true",href:"#statistical-based-approach",children:"#"}),"Statistical-based Approach"]}),"\n",(0,i.jsx)(s.p,{children:"The goal of quantization is to uniformly map the floating point numbers in Tensor to the range [-128, 127] represented by int8 via the scale parameter.\nSince the mapping is uniform, it is easy to see how scale is calculated:"}),"\n",(0,i.jsx)(s.pre,{className:"code",children:(0,i.jsx)(s.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(s.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"def"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"compute_scale"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"("}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-parameter)"},children:"x"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:":"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" Tensor):"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    xmin"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" xmax "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" x"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"max"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" maxv "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" x"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"min"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"return"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"max"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(xmin."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"abs"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(), xmax."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"abs"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"())"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"/"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-constant)"},children:"128.0"})]}),"\n"]})})}),"\n",(0,i.jsxs)(s.p,{children:["Due to the uneven distribution of data in Tensor and the outlier problem, different methods for calculating xmin and xmax have been developed, you can refer to the relevant introductions of interfaces such as ",(0,i.jsx)(s.code,{children:"MinMaxObserver"})," in the ",(0,i.jsx)(s.a,{href:"/latest/en/guide/plugin/user_guide/calibration.html#observer_param",children:"Observer Parameters"})," section."]}),"\n",(0,i.jsxs)(s.p,{children:["Please refer to ",(0,i.jsx)(s.a,{href:"/latest/en/guide/plugin/user_guide/qconfig.html",children:"QConfig in Detail"})," for the usage in the tool."]}),"\n",(0,i.jsxs)(s.h3,{id:"learning-based-approach",children:[(0,i.jsx)(s.a,{className:"header-anchor","aria-hidden":"true",href:"#learning-based-approach",children:"#"}),"Learning-based Approach"]}),"\n",(0,i.jsx)(s.p,{children:"Although the gradient of round is 0, the researcher found experimentally that in this scenario,\nif the gradient is directly set to 1, the model can also be made to converge to the expected accuracy."}),"\n",(0,i.jsx)(s.pre,{className:"code",children:(0,i.jsx)(s.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(s.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"def"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"round_ste"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"("}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-parameter)"},children:"x"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:":"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" Tensor):"})]}),"\n",(0,i.jsxs)(s.span,{className:"line line-number",children:[(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"return"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" (x"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"round"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"-"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" x)"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-function)"},children:"detach"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-token-keyword)"},children:"+"}),(0,i.jsx)(s.span,{style:{color:"var(--shiki-color-text)"},children:" x"})]}),"\n"]})})}),"\n",(0,i.jsxs)(s.p,{children:["Please refer to ",(0,i.jsx)(s.a,{href:"/latest/en/guide/plugin/user_guide/qconfig.html#definition-of-fakequantize",children:"Definition of FakeQuantize"})," for the usage in the tool."]}),"\n",(0,i.jsxs)(s.p,{children:["If you are interested in learning more, you can refer to the paper ",(0,i.jsx)(s.a,{href:"https://arxiv.org/abs/1902.08153",target:"_blank",rel:"noopener noreferrer",children:"Learned Step Size Quantization"}),"."]})]})}function l(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},{wrapper:s}=Object.assign({},(0,a.ah)(),e.components);return s?(0,i.jsx)(s,Object.assign({},e,{children:(0,i.jsx)(r,e)})):r(e)}s.default=l,l.__RSPRESS_PAGE_META={},l.__RSPRESS_PAGE_META["latest%2Fen%2Fguide%2Fplugin%2Fuser_guide%2Fqat_guide.mdx"]={toc:[{id:"process-and-example",text:"Process and Example",depth:2},{id:"pseudo-quantized-operator",text:"Pseudo-quantized Operator",depth:2},{id:"pseudo-quantization-process",text:"Pseudo-quantization Process",depth:3},{id:"statistical-based-approach",text:"Statistical-based Approach",depth:3},{id:"learning-based-approach",text:"Learning-based Approach",depth:3}],title:"Quantized Awareness Training Guide",frontmatter:{}}}}]);