diff --git a/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py b/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py index 02a6c08ca..324a3fdf0 100644 --- a/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py +++ b/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py @@ -53,9 +53,10 @@ def compute_graph_max_cut(memory_graph: MemoryGraph, try: schedule, max_cut_size, cuts = max_cut_astar.solve(estimate=estimate, iter_limit=astar_n_iter, time_limit=None if it == 0 else 300) - except TimeoutError: + except TimeoutError: # pragma: no cover + # TODO: add test for this. if last_result[0] is None: - Logger.critical(f"Max-cut solver stopped on timeout in iteration {it} before finding a solution.") # pragma: no cover + Logger.critical(f"Max-cut solver stopped on timeout in iteration {it} before finding a solution.") else: Logger.warning(f"Max-cut solver stopped on timeout in iteration {it}.") return last_result diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py index 429b49740..e4767b19b 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py @@ -170,7 +170,6 @@ def compute_resource_utilization(self, w_total, *_ = self.compute_weights_utilization(target_criterion, bitwidth_mode, w_qcs) if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets): - Logger.warning("Using an experimental feature max-cut for activation memory utilization estimation.") a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs) ru = ResourceUtilization() diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py index c61dbf6a1..fb63f963c 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py @@ -104,6 +104,12 @@ def requires_mixed_precision(in_model: Any, Returns: A boolean indicating if mixed precision is needed. """ + # Any target resource utilization other than weights will always require MP calculation. + if target_resource_utilization.activation_restricted() or \ + target_resource_utilization.total_mem_restricted() or \ + target_resource_utilization.bops_restricted(): + return True + core_config = _create_core_config_for_ru(core_config) transformed_graph = graph_preparation_runner(in_model, diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index d9812ff25..69b658a12 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -90,9 +90,11 @@ def core_runner(in_model: Any, # Checking whether to run mixed precision quantization if target_resource_utilization is not None and target_resource_utilization.is_any_restricted(): - if core_config.mixed_precision_config is None: + if core_config.mixed_precision_config is None: # pragma: no cover Logger.critical("Provided an initialized target_resource_utilization, that means that mixed precision quantization is " "enabled, but the provided MixedPrecisionQuantizationConfig is None.") + if target_resource_utilization.activation_restricted() or target_resource_utilization.total_mem_restricted(): + Logger.warning("Using an experimental feature max-cut for activation memory utilization estimation.") # Determine whether to use mixed precision or single precision based on target_resource_utilization. if requires_mixed_precision(in_model, target_resource_utilization, diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/manual_bit_selection.py b/tests/keras_tests/feature_networks_tests/feature_networks/manual_bit_selection.py index 7ac361a64..43f030d3d 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/manual_bit_selection.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/manual_bit_selection.py @@ -186,15 +186,29 @@ def get_tpc(self): qco_16 = QuantizationConfigOptions(base_config=base_cfg_16, quantization_configurations=quantization_configurations) + add_qco = get_config_options_by_operators_set(tpc, OperatorSetNames.ADD) + base_cfg_8 = [l for l in add_qco.quantization_configurations if l.activation_n_bits == 8][0] + add_qco_8 = QuantizationConfigOptions(base_config=base_cfg_8, quantization_configurations=[base_cfg_8]) + + sub_qco = get_config_options_by_operators_set(tpc, OperatorSetNames.SUB) + base_cfg_8 = [l for l in sub_qco.quantization_configurations if l.activation_n_bits == 8][0] + sub_qco_8 = QuantizationConfigOptions(base_config=base_cfg_8, quantization_configurations=[base_cfg_8]) + tpc = generate_custom_test_tpc( name="custom_16_bit_tpc", base_cfg=tpc.default_qco.base_config, base_tpc=tpc, operator_sets_dict={ OperatorSetNames.MUL: qco_16, + OperatorSetNames.ADD: add_qco_8, + OperatorSetNames.SUB: sub_qco_8, }) return tpc def get_resource_utilization(self): return mct.core.ResourceUtilization(activation_memory=6000) + + def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): + self.unit_test.assertTrue(len(quantization_info.mixed_precision_cfg) > 0, "Expected mixed-precision in test.") + super().compare(quantized_model, float_model, input_x=input_x, quantization_info=quantization_info)