Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit 03875a1

Browse files
authored
update recipes (#1685)
Signed-off-by: Sun, Xuehao <[email protected]>
1 parent 6fadb18 commit 03875a1

File tree

1 file changed

+42
-28
lines changed

1 file changed

+42
-28
lines changed

examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ pip install -v .
4040
# install requirements
4141
cd examples/huggingface/pytorch/text-generation/quantization
4242
pip install -r requirements.txt
43-
pip install neural-compressor==2.6
43+
pip install neural-compressor==3.0
4444
pip install torch==2.3.0+cpu --index-url https://download.pytorch.org/whl/cpu
4545
# 4.38.1 is only limited by smoothquant
46-
pip install transformers==4.38.1
46+
pip install transformers==4.38.2 # 4.42.4 for mistralai/Mistral-7B-v0.1
4747
# ipex is only necessary for smoothquant
4848
pip install intel-extension-for-pytorch==2.3.0
4949
```
@@ -61,7 +61,7 @@ python run_generation_sq.py \
6161
--tasks lambada_openai \
6262
--sq \
6363
--accuracy \
64-
--eval_batch_size 56 \
64+
--eval_batch_size 1 \
6565
--alpha 0.85
6666
```
6767

@@ -115,7 +115,7 @@ python run_generation_sq.py \
115115
--tasks lambada_openai \
116116
--sq \
117117
--accuracy \
118-
--eval_batch_size 56 \
118+
--eval_batch_size 1 \
119119
--alpha 0.9
120120
```
121121

@@ -169,7 +169,7 @@ python run_generation_sq.py \
169169
--tasks lambada_openai \
170170
--sq \
171171
--accuracy \
172-
--eval_batch_size 56 \
172+
--eval_batch_size 1 \
173173
--alpha 0.5
174174
```
175175

@@ -222,13 +222,15 @@ python run_generation_sq.py \
222222
--tasks lambada_openai \
223223
--sq \
224224
--accuracy \
225-
--batch_size 1 \
225+
--eval_batch_size 1 \
226226
--init_alpha 0.8 \
227-
--alpha_min 0.8 \
227+
--alpha_min 0.79 \
228228
--alpha_max 0.99 \
229229
--alpha_step 0.01 \
230230
--shared_criterion mean \
231231
--seq_len 2048 \
232+
--shuffle \
233+
--n_samples 512 \
232234
--alpha auto
233235
```
234236

@@ -278,16 +280,18 @@ python run_generation_cpu_woq.py \
278280
python run_generation_sq.py \
279281
--model meta-llama/Llama-2-13b-hf \
280282
--output_dir ./saved_results \
281-
--seq_len 1024 \
282283
--tasks lambada_openai \
283284
--sq \
284285
--accuracy \
285-
--batch_size 1 \
286+
--eval_batch_size 1 \
287+
--seq_len 1024 \
286288
--init_alpha 0.8 \
287289
--alpha_min 0.75 \
288290
--alpha_max 0.99 \
289291
--alpha_step 0.01 \
290292
--shared_criterion max \
293+
--padding \
294+
--n_samples 512 \
291295
--alpha auto
292296
```
293297

@@ -340,8 +344,9 @@ python run_generation_sq.py \
340344
--tasks lambada_openai \
341345
--sq \
342346
--accuracy \
343-
--eval_batch_size 56 \
344-
--alpha 0.8
347+
--eval_batch_size 1 \
348+
--alpha 0.8 \
349+
--n_samples 512
345350
```
346351

347352
### Weight-Only Quantization
@@ -394,7 +399,7 @@ python run_generation_sq.py \
394399
--tasks lambada_openai \
395400
--sq \
396401
--accuracy \
397-
--eval_batch_size 56 \
402+
--eval_batch_size 1 \
398403
--alpha 0.9
399404
```
400405

@@ -447,7 +452,7 @@ python run_generation_sq.py \
447452
--trust_remote_code \
448453
--tasks lambada_openai \
449454
--sq --accuracy \
450-
--batch_size 1 \
455+
--eval_batch_size 1 \
451456
--alpha 0.95
452457
```
453458

@@ -500,7 +505,7 @@ python run_generation_sq.py \
500505
--tasks lambada_openai \
501506
--sq \
502507
--accuracy \
503-
--eval_batch_size 56 \
508+
--eval_batch_size 1 \
504509
--alpha 0.95
505510
```
506511

@@ -554,7 +559,7 @@ python run_generation_sq.py \
554559
--tasks lambada_openai \
555560
--sq \
556561
--accuracy \
557-
--eval_batch_size 56 \
562+
--eval_batch_size 1 \
558563
--alpha 0.65
559564
```
560565

@@ -607,7 +612,7 @@ python run_generation_sq.py \
607612
--trust_remote_code \
608613
--tasks lambada_openai \
609614
--sq --accuracy \
610-
--batch_size 1 \
615+
--eval_batch_size 1 \
611616
--alpha 0.5
612617
```
613618

@@ -662,8 +667,13 @@ python run_generation_sq.py \
662667
--tasks lambada_openai \
663668
--sq \
664669
--accuracy \
665-
--eval_batch_size 56 \
666-
--alpha 0.75
670+
--eval_batch_size 1 \
671+
--init_alpha 0.7 \
672+
--alpha_min 0.55 \
673+
--alpha_max 0.8 \
674+
--alpha_step 0.01 \
675+
--shared_criterion mean \
676+
--n_samples 512
667677
```
668678

669679
### Weight-Only Quantization
@@ -715,8 +725,12 @@ python run_generation_sq.py \
715725
--tasks lambada_openai \
716726
--sq \
717727
--accuracy \
718-
--eval_batch_size 56 \
719-
--alpha 0.9
728+
--eval_batch_size 1 \
729+
--init_alpha 0.85 \
730+
--alpha_min 0.79 \
731+
--alpha_max 0.88 \
732+
--alpha_step 0.01 \
733+
--shared_criterion mean
720734
```
721735

722736
### Weight-Only Quantization
@@ -768,7 +782,7 @@ python run_generation_sq.py \
768782
--tasks lambada_openai \
769783
--sq \
770784
--accuracy \
771-
--eval_batch_size 56 \
785+
--eval_batch_size 1 \
772786
--alpha 0.6
773787
```
774788

@@ -821,7 +835,7 @@ python run_generation_sq.py \
821835
--tasks lambada_openai \
822836
--sq \
823837
--accuracy \
824-
--eval_batch_size 56 \
838+
--eval_batch_size 1 \
825839
--alpha 0.7
826840
```
827841

@@ -874,7 +888,7 @@ python run_generation_sq.py \
874888
--tasks lambada_openai \
875889
--sq \
876890
--accuracy \
877-
--eval_batch_size 56 \
891+
--eval_batch_size 1 \
878892
--alpha 0.75
879893
```
880894

@@ -896,10 +910,10 @@ python run_generation_cpu_woq.py \
896910
--woq_algo GPTQ \
897911
--bits 4 \
898912
--weight_dtype int4 \
899-
--desc_act \
900-
--seq_len 2048 \
901-
--scheme sym \
902-
--group_size 32 \
913+
--scheme asym \
914+
--group_size 128 \
915+
--use_mse_search \
916+
--n_samples 128
903917
--accuracy
904918

905919
# int4 AutoRound
@@ -927,7 +941,7 @@ python run_generation_sq.py \
927941
--tasks lambada_openai \
928942
--sq \
929943
--accuracy \
930-
--eval_batch_size 56 \
944+
--eval_batch_size 1 \
931945
--alpha 0.75
932946
```
933947

0 commit comments

Comments
 (0)