@@ -40,8 +40,9 @@ pip install -v .
40
40
# install requirements
41
41
cd examples/huggingface/pytorch/text-generation/quantization
42
42
pip install -r requirements.txt
43
- pip install neural-compressor==2.5
44
- pip install transformers==4.35.2
43
+ pip install neural-compressor==2.6
44
+ pip install transformers==4.38.1
45
+
45
46
pip install torch==2.3.0+cpu --index-url https://download.pytorch.org/whl/cpu
46
47
pip install intel-extension-for-pytorch==2.3.0
47
48
```
@@ -221,10 +222,11 @@ python run_generation_sq.py \
221
222
--calib_len 2048 \
222
223
--fallback_add \
223
224
--calib_shuffle False \
225
+ --calib_iters 512 \
224
226
--tasks lambada_openai \
225
227
--int8 --sq --accuracy \
226
228
--batch_size 1 \
227
- --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.8 , 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'mean'}}}"
229
+ --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.79 , 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'mean'}}}"
228
230
```
229
231
230
232
### Weight-Only Quantization
@@ -276,11 +278,12 @@ python run_generation_sq.py \
276
278
--trust_remote_code \
277
279
--calib_len 1024 \
278
280
--fallback_add \
281
+ --calib_iters 512
279
282
--calib_padding \
280
283
--tasks lambada_openai \
281
284
--int8 --sq --accuracy \
282
285
--batch_size 1 \
283
- --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.75, 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'max'}}}"
286
+ --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.75, 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'max', 'n_samples':64 }}}"
284
287
```
285
288
286
289
### Weight-Only Quantization
@@ -544,7 +547,7 @@ python run_generation_sq.py \
544
547
--tasks lambada_openai \
545
548
--int8 --sq --accuracy \
546
549
--batch_size 1 \
547
- --alpha 0.65
550
+ --alpha 1.0
548
551
```
549
552
550
553
### Weight-Only Quantization
@@ -650,8 +653,10 @@ python run_generation_sq.py \
650
653
--trust_remote_code \
651
654
--tasks lambada_openai \
652
655
--int8 --sq --accuracy \
656
+ --calib_iters 512
653
657
--batch_size 1 \
654
- --alpha 0.75
658
+ --recipes " {'smooth_quant':True,'smooth_quant_args':{'alpha':'auto','folding':False,'default_alpha':0.7,'auto_alpha_args':{'alpha_min':0.55,'alpha_max':0.8,'alpha_step':0.01,'shared_criterion':'mean','n_samples':64}}}" \
659
+ --calib_iters 512
655
660
```
656
661
657
662
### Weight-Only Quantization
@@ -702,8 +707,8 @@ python run_generation_sq.py \
702
707
--trust_remote_code \
703
708
--tasks lambada_openai \
704
709
--int8 --sq --accuracy \
705
- --batch_size 1 \
706
- --alpha 0.9
710
+ --recipes " {'smooth_quant':True,'smooth_quant_args':{'alpha':'auto','folding':False,'default_alpha':0.85,'auto_alpha_args':{'alpha_min':0.79,'alpha_max':0.88,'alpha_step':0.01,'shared_criterion':'mean'}}} " \
711
+ --batch_size 1
707
712
```
708
713
709
714
### Weight-Only Quantization
0 commit comments