Skip to content

Commit d6983f6

Browse files
[NPUW] Fix serialization (#31496)
Bug introduced here #31294
1 parent 52e99ef commit d6983f6

File tree

3 files changed

+4
-2
lines changed

3 files changed

+4
-2
lines changed

src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,6 +1284,7 @@ void ov::npuw::LLMCompiledModel::serialize(std::ostream& stream, const ov::npuw:
12841284
write(model_stream, m_kvcache_desc.num_stored_tokens);
12851285
write(model_stream, m_kvcache_desc.dim);
12861286
write(model_stream, m_kvcache_desc.v_tensors_transposed);
1287+
write(model_stream, m_prefill_chunk_size);
12871288

12881289
// Write config
12891290
write(model_stream, m_cfg);
@@ -1474,6 +1475,7 @@ std::shared_ptr<ov::npuw::LLMCompiledModel> ov::npuw::LLMCompiledModel::deserial
14741475
read(model_stream, compiled->m_kvcache_desc.num_stored_tokens);
14751476
read(model_stream, compiled->m_kvcache_desc.dim);
14761477
read(model_stream, compiled->m_kvcache_desc.v_tensors_transposed);
1478+
read(model_stream, compiled->m_prefill_chunk_size);
14771479

14781480
// Deserialize config
14791481
read(model_stream, compiled->m_cfg);

src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class LLMCompiledModel : public ov::npuw::ICompiledModel {
6868
ov::npuw::s11n::BF16Cache m_bf16_consts;
6969

7070
KVCacheDesc m_kvcache_desc;
71-
uint64_t m_prefill_chunk_size;
71+
uint64_t m_prefill_chunk_size = 0;
7272
std::shared_ptr<ov::npuw::CompiledModel> m_kvcache_compiled;
7373
std::shared_ptr<ov::npuw::CompiledModel> m_prefill_compiled;
7474
// This model is optional, so can be null.

src/plugins/intel_npu/src/plugin/npuw/serialization.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ const constexpr ov::npuw::s11n::IndicatorType NPUW_COMPILED_MODEL_INDICATOR =
3434
const constexpr ov::npuw::s11n::IndicatorType NPUW_LLM_COMPILED_MODEL_INDICATOR =
3535
{char{0x4c}, char{0x4c}, char{0x4d}, char{0x43}, char{0x4d}, char{0x4f}};
3636

37-
const constexpr char* NPUW_SERIALIZATION_VERSION = "0.5";
37+
const constexpr char* NPUW_SERIALIZATION_VERSION = "0.6";
3838

3939
// Forward declaration
4040
namespace intel_npu {

0 commit comments

Comments
 (0)