intel · bkowalskiINTEL · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/neural_compressor/jax/algorithms/dynamic.py b/neural_compressor/jax/algorithms/dynamic.py
@@ -21,10 +21,6 @@
 from neural_compressor.common.base_config import BaseConfig
 from neural_compressor.common.utils import DYNAMIC_QUANT
 from neural_compressor.jax.quantization.layers_dynamic import dynamic_quant_mapping
-from neural_compressor.jax.quantization.saving import (
-    WRAPPER_MAPPING,
-    KerasQuantizedModelBackboneWrapper,
-)
 from neural_compressor.jax.utils import register_algo
 from neural_compressor.jax.utils.utility import dtype_mapping, iterate_over_layers
 
@@ -48,36 +44,24 @@ def dynamic_quantize(
         **kwargs (Any): Additional keyword arguments (unused).
 
     Returns:
-        keras.Model: The quantized model wrapped for inference.
+        keras.Model: The model with quantized layers.
     """
-    for _, value in configs_mapping.items():
-        config = value
-        break
-    weight_dtype = dtype_mapping[config.weight_dtype]
-    activation_dtype = dtype_mapping[config.activation_dtype]
+    # Build set of layer paths that this algorithm should process
+    layer_configs = {op_path: cfg for (op_path, _op_type), cfg in configs_mapping.items() if cfg.name == DYNAMIC_QUANT}
 
-    # TODO serialization/deserialisation doesn't work for Gemma3CausalLM model
-    # Need to further investigation.
-    # Instead of copying model we can mark model parameter as mutable.
-    # config = serialization_lib.serialize_keras_object(model)
-    # qmodel = serialization_lib.deserialize_keras_object(
-    #     config, custom_objects={model.__class__.__name__: model.__class__}
-    # )
-    # qmodel.set_weights(model.get_weights())
     qmodel = model
-    operations = [
-        lambda layer: dynamic_quant_mapping[layer.__class__].prepare(
-            layer, weight_dtype, activation_dtype, quant_config.const_scale, quant_config.const_weight
-        ),
-        lambda layer: layer.add_variables(),
-        lambda layer: layer.post_quantization_cleanup(),
-    ]
-    iterate_over_layers(qmodel, operations, filter_function=lambda c: c in dynamic_quant_mapping)
 
-    if hasattr(qmodel, "backbone"):
-        qmodel._tracker.unlock()
-        qmodel.backbone = KerasQuantizedModelBackboneWrapper(qmodel.backbone, quant_config)
-        qmodel._tracker.lock()
+    for layer in qmodel._flatten_layers():
+        layer_id = layer.path if layer.path else layer.name
+        if layer_id not in layer_configs:
+            continue
+        config = layer_configs[layer_id]
+        weight_dtype = dtype_mapping[config.weight_dtype]
+        activation_dtype = dtype_mapping[config.activation_dtype]
+        dynamic_quant_mapping[layer.__class__].prepare(
+            layer, weight_dtype, activation_dtype, config.const_scale, config.const_weight
+        )
+        layer.add_variables()
+        layer.post_quantization_cleanup()
 
-    wrapper_cls = WRAPPER_MAPPING[qmodel.__class__]
-    return wrapper_cls(qmodel, quant_config)
+    return qmodel
diff --git a/neural_compressor/jax/algorithms/static.py b/neural_compressor/jax/algorithms/static.py
@@ -17,18 +17,12 @@
 from typing import Any, Callable, Optional, OrderedDict, Union
 
 import keras
-from keras_hub.src.models.causal_lm import CausalLM
 
 from neural_compressor.common.base_config import BaseConfig
 from neural_compressor.common.utils import STATIC_QUANT
 from neural_compressor.jax.quantization.layers_static import static_quant_mapping
-from neural_compressor.jax.quantization.saving import (
-    WRAPPER_MAPPING,
-    KerasQuantizedModelBackboneWrapper,
-)
 from neural_compressor.jax.utils import register_algo
 from neural_compressor.jax.utils.utility import (
-    causal_lm_make_replace_generate_function,
     dtype_mapping,
     iterate_over_layers,
 )
@@ -51,34 +45,25 @@ def static_quantize(
         calib_function (Optional[Callable]): Calibration function used to collect activation statistics.
 
     Returns:
-        keras.Model: The quantized model wrapped for inference.
+        keras.Model: The model with quantized layers.
     """
-    for _, value in configs_mapping.items():
-        config = value
-        break
-    weight_dtype = dtype_mapping[config.weight_dtype]
-    activation_dtype = dtype_mapping[config.activation_dtype]
+    # Build set of layer paths that this algorithm should process
+    layer_configs = {op_path: cfg for (op_path, _op_type), cfg in configs_mapping.items() if cfg.name == STATIC_QUANT}
 
-    # TODO serialization/deserialization doesn't work for Gemma3CausalLM model
-    # Need to further investigation.
-    # Instead of copying model we can mark model parameter as mutable.
-    # config = serialization_lib.serialize_keras_object(model)
-    # qmodel = serialization_lib.deserialize_keras_object(
-    #     config, custom_objects={model.__class__.__name__: model.__class__}
-    # )
-    # qmodel.set_weights(model.get_weights())
     qmodel = model
 
-    if isinstance(qmodel, CausalLM):
-        causal_lm_make_replace_generate_function(qmodel)
+    for layer in qmodel._flatten_layers():
+        layer_id = layer.path if layer.path else layer.name
+        if layer_id not in layer_configs:
+            continue
+        config = layer_configs[layer_id]
+        weight_dtype = dtype_mapping[config.weight_dtype]
+        activation_dtype = dtype_mapping[config.activation_dtype]
+        static_quant_mapping[layer.__class__].prepare(
+            layer, weight_dtype, activation_dtype, config.const_scale, config.const_weight
+        )
+        layer.add_observers()
 
-    operations = [
-        lambda layer: static_quant_mapping[layer.__class__].prepare(
-            layer, weight_dtype, activation_dtype, quant_config.const_scale, quant_config.const_weight
-        ),
-        lambda layer: layer.add_observers(),
-    ]
-    iterate_over_layers(qmodel, operations, filter_function=lambda c: c in static_quant_mapping)
     calib_function(qmodel)
 
     operations = [
@@ -88,14 +73,4 @@ def static_quantize(
     ]
     iterate_over_layers(qmodel, operations, filter_function=lambda c: c in static_quant_mapping.values())
 
-    if isinstance(qmodel, CausalLM):
-        causal_lm_make_replace_generate_function(qmodel, revert=True)
-
-    if hasattr(qmodel, "backbone"):
-        qmodel._tracker.unlock()
-        qmodel.backbone = KerasQuantizedModelBackboneWrapper(qmodel.backbone, quant_config)
-        qmodel._tracker.lock()
-
-    wrapper_cls = WRAPPER_MAPPING[qmodel.__class__]
-
-    return wrapper_cls(qmodel, quant_config)
+    return qmodel