Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit b2d5768

Browse files
authored
Merge branch 'main' into hengguo/h2o
2 parents 2706d5d + b00652d commit b2d5768

File tree

8 files changed

+12
-8
lines changed

8 files changed

+12
-8
lines changed

docs/publication.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
Full Publications/Events (50)
22
==========
33

4-
## 2024 (12)
4+
## 2024 (14)
5+
* Blog published on Intel information: [解决方案为最新Meta Llama 3.1模型提供加速](https://mp.weixin.qq.com/s/Qk3mSWPW8qdIMswpEJxVoA)
6+
* Blog published on Intel Developer News: [Intel AI Solutions Boost LLMs: Unleashing the Power of Meta* Llama 3.1](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-support-meta-llama-3-1-launch.html)
57
* Blog published on digit.in: [AI hallucination in LLM and beyond: Will it ever be fixed?](https://www.digit.in/features/general/ai-hallucination-in-llm-and-beyond-can-it-be-fixed-completely.html)
68
* Blog published on Medium: [Accelerating Qwen2 Models with Intel Extension for Transformers](https://medium.com/intel-analytics-software/accelerating-qwen2-models-with-intel-extension-for-transformers-99403de82f68) (June 2024)
79
* Blog published on Huggingface: [Building Cost-Efficient Enterprise RAG applications with Intel Gaudi 2 and Intel Xeon](https://huggingface.co/blog/cost-efficient-rag-applications-with-intel) (May 2024)

examples/huggingface/pytorch/code-generation/quantization/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ tiktoken #code_gen
1111
neural-compressor
1212
intel_extension_for_pytorch==2.3.0
1313
git+https://github.com/huggingface/optimum-intel.git@50d867c13b22c22eda451ddb67bddb8159670f85
14-
auto-round==0.2
14+
git+https://github.com/intel/auto-round.git@61cf9eef4a3ccb5a2d83a557deb709091a548581
1515
git+https://github.com/bigcode-project/bigcode-evaluation-harness@094c7cc197d13a53c19303865e2056f1c7488ac1

examples/huggingface/pytorch/text-generation/quantization/requirements_GPU.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ bitsandbytes #baichuan
1212
transformers_stream_generator
1313
tiktoken #qwen
1414
einops #qwen
15-
auto-round
15+
git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
1616
git+https://github.com/intel/neural-compressor.git
1717
lm-eval==0.4.3

examples/huggingface/pytorch/text-generation/quantization/requirements_cpu_woq.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ transformers_stream_generator
1111
tiktoken #qwen
1212
einops #qwen
1313
git+https://github.com/intel/neural-speed.git
14-
auto-round==0.2
14+
git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
1515
git+https://github.com/intel/neural-compressor.git
1616
lm-eval==0.4.3
1717
huggingface_hub

intel_extension_for_transformers/transformers/llm/quantization/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ def convert_to_quantized_model(model, config, device="cpu"):
658658
lr=config.lr,
659659
minmax_lr=config.minmax_lr,
660660
seqlen=config.seq_len,
661-
n_samples=config.n_samples,
661+
nsamples=config.n_samples,
662662
iters=config.iters,
663663
scale_dtype=config.scale_dtype,
664664
)
@@ -672,7 +672,7 @@ def convert_to_quantized_model(model, config, device="cpu"):
672672
dataset_name="NeelNanda/pile-10k",
673673
seed=42,
674674
bs=config.batch_size,
675-
n_samples=config.n_samples)
675+
nsamples=config.n_samples)
676676
run_fn = run_fn_for_autoround
677677
run_args = (dataloader,)
678678
model = prepare(model=model, quant_config=quant_config)

tests/CI/test_quantization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def test_quantization_for_llm(self):
432432
woq_model.eval()
433433
output = woq_model(dummy_input)
434434
if CpuInfo().bf16:
435-
self.assertTrue(isclose(float(output[0][0][0][0]), 0.1513671875, rel_tol=1e-04))
435+
self.assertTrue(isclose(float(output[0][0][0][0]), 0.150390625, rel_tol=1e-04))
436436

437437
def test_export(self):
438438
# test model with model_id

tests/CI/test_weight_only.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ def test_auto_model_saving_loading(self):
208208
module_list.append(name)
209209
self.assertTrue(len(module_list) > 0)
210210

211+
@unittest.skip("need bug fix.")
211212
def test_nf4_training(self):
212213
quantization_config = RtnConfig(bits=4, weight_dtype="nf4", scale_dtype="fp32")
213214
model = AutoModelForCausalLM.from_pretrained(
@@ -251,6 +252,7 @@ def test_nf4_training(self):
251252
module.unmerge()
252253
model.merge_and_unload()
253254

255+
@unittest.skip("need bug fix.")
254256
def test_int8_training(self):
255257
model = AutoModelForCausalLM.from_pretrained(
256258
llama_model_path, load_in_8bit=True, use_neural_speed=False)

tests/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
--extra-index-url https://download.pytorch.org/whl/cpu
22
accelerate
33
auto-gptq
4-
auto-round==0.2
54
bitsandbytes
65
datasets==2.16.1
76
einops
87
evaluate
98
gguf
109
git+https://github.com/huggingface/optimum-intel.git@50d867c13b22c22eda451ddb67bddb8159670f85
10+
git+https://github.com/intel/auto-round.git@61cf9eef4a3ccb5a2d83a557deb709091a548581
1111
git+https://github.com/intel/neural-compressor.git
1212
git+https://github.com/intel/neural-speed.git
1313
intel-extension-for-pytorch==2.3.0

0 commit comments

Comments
 (0)