AmosLewis · December 15, 2023 00:49 · AmosLewis · Dec 15, 2023
diff --git a/chatglm_fail_1214.txt b/chatglm_fail_1214.txt
 (shark.venv) ➜  SHARK git:(main) ✗ python nan/chatglm.py
 shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag
 [DEBUG] generating mlir on device
 /nodclouddata/chi/src/SHARK/nan/chatglm.py:103: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  input_ids = torch.tensor(input_ids)
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
  torch.utils._pytree._register_pytree_node(
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
  torch.utils._pytree._register_pytree_node(
 Loading checkpoint shards: 100%|████████████████████████████████████████████████| 7/7 [00:59<00:00,  8.48s/it]
 Model before quantization:  ChatGLMForConditionalGeneration(
  (transformer): ChatGLMModel(
    (embedding): Embedding(
      (word_embeddings): Embedding(65024, 4096)
    )
    (rotary_pos_emb): RotaryEmbedding()
    (encoder): GLMTransformer(
      (layers): ModuleList(
        (0-27): 28 x GLMBlock(
          (input_layernorm): RMSNorm()
          (self_attention): SelfAttention(
            (query_key_value): Linear(in_features=4096, out_features=4608, bias=True)
            (core_attention): CoreAttention(
              (attention_dropout): Dropout(p=0.0, inplace=False)
            )
            (dense): Linear(in_features=4096, out_features=4096, bias=False)
          )
          (post_attention_layernorm): RMSNorm()
          (mlp): MLP(
            (dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False)
            (dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False)
          )
        )
      )
      (final_layernorm): RMSNorm()
    )
    (output_layer): Linear(in_features=4096, out_features=65024, bias=False)
  )
 )
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/immutable_collections.py:114: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
  _register_pytree_node(immutable_dict, _immutable_dict_flatten, _immutable_dict_unflatten)
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/immutable_collections.py:115: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
  _register_pytree_node(immutable_list, _immutable_list_flatten, _immutable_list_unflatten)
 Applying weight quantization..
 Weight quantization applied.
 Model after quantization:  ChatGLMForConditionalGeneration(
  (transformer): ChatGLMModel(
    (embedding): Embedding(
      (word_embeddings): Embedding(65024, 4096)
    )
    (rotary_pos_emb): RotaryEmbedding()
    (encoder): GLMTransformer(
      (layers): ModuleList(
        (0-27): 28 x GLMBlock(
          (input_layernorm): RMSNorm()
          (self_attention): SelfAttention(
            (query_key_value): QuantLinear(
              in_features=4096, out_features=4608, bias=True
              (input_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (output_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (weight_quant): WeightQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
                (tensor_quant): RescalingIntQuant(
                  (int_quant): IntQuant(
                    (float_to_int_impl): RoundSte()
                    (tensor_clamp_impl): TensorClampSte()
                    (delay_wrapper): DelayWrapper(
                      (delay_impl): _NoDelay()
                    )
                  )
                  (scaling_impl): ExpandReshapeScalingWrapper(
                    (wrapped_scaling_impl): ParameterFromStatsFromParameterScaling(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): AbsMinMax(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (stats_scaling_impl): _StatsScaling(
                        (affine_rescaling): Identity()
                        (restrict_clamp_scaling): _RestrictClampValue(
                          (clamp_min_ste): ScalarClampMinSte()
                          (restrict_value_impl): FloatRestrictValue()
                        )
                        (restrict_scaling_pre): Identity()
                      )
                      (restrict_inplace_preprocess): Identity()
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (int_scaling_impl): IntScaling()
                  (zero_point_impl): ExpandReshapeZeroPointWrapper(
                    (wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): NegativeMinOrZero(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (scale_shift_zero_point): _ScaleShiftZeroPoint(
                        (int_quant): IntQuant(
                          (float_to_int_impl): RoundSte()
                          (tensor_clamp_impl): TensorClampSte()
                          (delay_wrapper): DelayWrapper(
                            (delay_impl): _NoDelay()
                          )
                        )
                      )
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (msb_clamp_bit_width_impl): BitWidthConst(
                    (bit_width): StatelessBuffer()
                  )
                )
              )
              (bias_quant): BiasQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
            )
            (core_attention): CoreAttention(
              (attention_dropout): Dropout(p=0.0, inplace=False)
            )
            (dense): QuantLinear(
              in_features=4096, out_features=4096, bias=False
              (input_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (output_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (weight_quant): WeightQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
                (tensor_quant): RescalingIntQuant(
                  (int_quant): IntQuant(
                    (float_to_int_impl): RoundSte()
                    (tensor_clamp_impl): TensorClampSte()
                    (delay_wrapper): DelayWrapper(
                      (delay_impl): _NoDelay()
                    )
                  )
                  (scaling_impl): ExpandReshapeScalingWrapper(
                    (wrapped_scaling_impl): ParameterFromStatsFromParameterScaling(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): AbsMinMax(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (stats_scaling_impl): _StatsScaling(
                        (affine_rescaling): Identity()
                        (restrict_clamp_scaling): _RestrictClampValue(
                          (clamp_min_ste): ScalarClampMinSte()
                          (restrict_value_impl): FloatRestrictValue()
                        )
                        (restrict_scaling_pre): Identity()
                      )
                      (restrict_inplace_preprocess): Identity()
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (int_scaling_impl): IntScaling()
                  (zero_point_impl): ExpandReshapeZeroPointWrapper(
                    (wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): NegativeMinOrZero(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (scale_shift_zero_point): _ScaleShiftZeroPoint(
                        (int_quant): IntQuant(
                          (float_to_int_impl): RoundSte()
                          (tensor_clamp_impl): TensorClampSte()
                          (delay_wrapper): DelayWrapper(
                            (delay_impl): _NoDelay()
                          )
                        )
                      )
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (msb_clamp_bit_width_impl): BitWidthConst(
                    (bit_width): StatelessBuffer()
                  )
                )
              )
              (bias_quant): BiasQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
            )
          )
          (post_attention_layernorm): RMSNorm()
          (mlp): MLP(
            (dense_h_to_4h): QuantLinear(
              in_features=4096, out_features=27392, bias=False
              (input_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (output_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (weight_quant): WeightQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
                (tensor_quant): RescalingIntQuant(
                  (int_quant): IntQuant(
                    (float_to_int_impl): RoundSte()
                    (tensor_clamp_impl): TensorClampSte()
                    (delay_wrapper): DelayWrapper(
                      (delay_impl): _NoDelay()
                    )
                  )
                  (scaling_impl): ExpandReshapeScalingWrapper(
                    (wrapped_scaling_impl): ParameterFromStatsFromParameterScaling(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): AbsMinMax(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (stats_scaling_impl): _StatsScaling(
                        (affine_rescaling): Identity()
                        (restrict_clamp_scaling): _RestrictClampValue(
                          (clamp_min_ste): ScalarClampMinSte()
                          (restrict_value_impl): FloatRestrictValue()
                        )
                        (restrict_scaling_pre): Identity()
                      )
                      (restrict_inplace_preprocess): Identity()
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (int_scaling_impl): IntScaling()
                  (zero_point_impl): ExpandReshapeZeroPointWrapper(
                    (wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): NegativeMinOrZero(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (scale_shift_zero_point): _ScaleShiftZeroPoint(
                        (int_quant): IntQuant(
                          (float_to_int_impl): RoundSte()
                          (tensor_clamp_impl): TensorClampSte()
                          (delay_wrapper): DelayWrapper(
                            (delay_impl): _NoDelay()
                          )
                        )
                      )
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (msb_clamp_bit_width_impl): BitWidthConst(
                    (bit_width): StatelessBuffer()
                  )
                )
              )
              (bias_quant): BiasQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
            )
            (dense_4h_to_h): QuantLinear(
              in_features=13696, out_features=4096, bias=False
              (input_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (output_quant): ActQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
              (weight_quant): WeightQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
                (tensor_quant): RescalingIntQuant(
                  (int_quant): IntQuant(
                    (float_to_int_impl): RoundSte()
                    (tensor_clamp_impl): TensorClampSte()
                    (delay_wrapper): DelayWrapper(
                      (delay_impl): _NoDelay()
                    )
                  )
                  (scaling_impl): ExpandReshapeScalingWrapper(
                    (wrapped_scaling_impl): ParameterFromStatsFromParameterScaling(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): AbsMinMax(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (stats_scaling_impl): _StatsScaling(
                        (affine_rescaling): Identity()
                        (restrict_clamp_scaling): _RestrictClampValue(
                          (clamp_min_ste): ScalarClampMinSte()
                          (restrict_value_impl): FloatRestrictValue()
                        )
                        (restrict_scaling_pre): Identity()
                      )
                      (restrict_inplace_preprocess): Identity()
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (int_scaling_impl): IntScaling()
                  (zero_point_impl): ExpandReshapeZeroPointWrapper(
                    (wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint(
                      (parameter_list_stats): _ParameterListStats(
                        (first_tracked_param): _ViewParameterWrapper(
                          (view_shape_impl): OverSubChannelBlockView(
                            (permute_impl): Identity()
                          )
                        )
                        (stats): _Stats(
                          (stats_impl): NegativeMinOrZero(
                            (zero): StatelessBuffer()
                          )
                        )
                      )
                      (scale_shift_zero_point): _ScaleShiftZeroPoint(
                        (int_quant): IntQuant(
                          (float_to_int_impl): RoundSte()
                          (tensor_clamp_impl): TensorClampSte()
                          (delay_wrapper): DelayWrapper(
                            (delay_impl): _NoDelay()
                          )
                        )
                      )
                    )
                    (slice_tensor): SliceTensor()
                  )
                  (msb_clamp_bit_width_impl): BitWidthConst(
                    (bit_width): StatelessBuffer()
                  )
                )
              )
              (bias_quant): BiasQuantProxyFromInjector(
                (_zero_hw_sentinel): StatelessBuffer()
              )
            )
          )
        )
      )
      (final_layernorm): RMSNorm()
    )
    (output_layer): QuantLinear(
      in_features=4096, out_features=65024, bias=False
      (input_quant): ActQuantProxyFromInjector(
        (_zero_hw_sentinel): StatelessBuffer()
      )
      (output_quant): ActQuantProxyFromInjector(
        (_zero_hw_sentinel): StatelessBuffer()
      )
      (weight_quant): WeightQuantProxyFromInjector(
        (_zero_hw_sentinel): StatelessBuffer()
        (tensor_quant): RescalingIntQuant(
          (int_quant): IntQuant(
            (float_to_int_impl): RoundSte()
            (tensor_clamp_impl): TensorClampSte()
            (delay_wrapper): DelayWrapper(
              (delay_impl): _NoDelay()
            )
          )
          (scaling_impl): ExpandReshapeScalingWrapper(
            (wrapped_scaling_impl): ParameterFromStatsFromParameterScaling(
              (parameter_list_stats): _ParameterListStats(
                (first_tracked_param): _ViewParameterWrapper(
                  (view_shape_impl): OverSubChannelBlockView(
                    (permute_impl): Identity()
                  )
                )
                (stats): _Stats(
                  (stats_impl): AbsMinMax(
                    (zero): StatelessBuffer()
                  )
                )
              )
              (stats_scaling_impl): _StatsScaling(
                (affine_rescaling): Identity()
                (restrict_clamp_scaling): _RestrictClampValue(
                  (clamp_min_ste): ScalarClampMinSte()
                  (restrict_value_impl): FloatRestrictValue()
                )
                (restrict_scaling_pre): Identity()
              )
              (restrict_inplace_preprocess): Identity()
            )
            (slice_tensor): SliceTensor()
          )
          (int_scaling_impl): IntScaling()
          (zero_point_impl): ExpandReshapeZeroPointWrapper(
            (wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint(
              (parameter_list_stats): _ParameterListStats(
                (first_tracked_param): _ViewParameterWrapper(
                  (view_shape_impl): OverSubChannelBlockView(
                    (permute_impl): Identity()
                  )
                )
                (stats): _Stats(
                  (stats_impl): NegativeMinOrZero(
                    (zero): StatelessBuffer()
                  )
                )
              )
              (scale_shift_zero_point): _ScaleShiftZeroPoint(
                (int_quant): IntQuant(
                  (float_to_int_impl): RoundSte()
                  (tensor_clamp_impl): TensorClampSte()
                  (delay_wrapper): DelayWrapper(
                    (delay_impl): _NoDelay()
                  )
                )
              )
            )
            (slice_tensor): SliceTensor()
          )
          (msb_clamp_bit_width_impl): BitWidthConst(
            (bit_width): StatelessBuffer()
          )
        )
      )
      (bias_quant): BiasQuantProxyFromInjector(
        (_zero_hw_sentinel): StatelessBuffer()
      )
    )
  )
 )
 [DEBUG] generating torchscript graph
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/experimental/proxy_tensor.py:97: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
  pytree._register_pytree_node(torch.Size, lambda x: (list(x), None), lambda xs, _: tuple(xs))
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/torch/utils/_pytree.py:254: UserWarning: <class 'torch.Size'> is already registered as pytree node. Overwriting the previous registration.
  warnings.warn(
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/torch/_tensor.py:1394: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at ../c10/core/TensorImpl.h:1908.)
  return super().rename(names)
 /nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/node.py:335: UserWarning: Trying to prepend a node to itself. This behavior has no effect on the graph.
  warnings.warn(
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 found an upcasting block let's upcast it.
 [DEBUG] Compiling torchscript graph
 [DEBUG] Lowering Torch -> Linalg
 [DEBUG] Successfully Generated mlir on device
 [DEBUG] converting to bytecode
 Saved falcon mlir at  chatglm-6b-int4.mlir
 Compiling for device : cpu-task
 Configuring for device:cpu-task
 Target triple found:x86_64-linux-gnu
 Traceback (most recent call last):
  File "/nodclouddata/chi/src/SHARK/nan/chatglm.py", line 170, in <module>
    path = shark_module.save_module(
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/nodclouddata/chi/src/SHARK/shark/shark_inference.py", line 213, in save_module
    return export_iree_module_to_vmfb(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/nodclouddata/chi/src/SHARK/shark/iree_utils/compile_utils.py", line 554, in export_iree_module_to_vmfb
    flatbuffer_blob = compile_module_to_flatbuffer(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/nodclouddata/chi/src/SHARK/shark/iree_utils/compile_utils.py", line 338, in compile_module_to_flatbuffer
    flatbuffer_blob = ireec.compile_file(
                      ^^^^^^^^^^^^^^^^^^^
  File "/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/core.py", line 255, in compile_file
    result = invoke_immediate(cl)
             ^^^^^^^^^^^^^^^^^^^^
  File "/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/binaries.py", line 198, in invoke_immediate
    raise CompilerToolError(process)
 iree.compiler.tools.binaries.CompilerToolError: Error invoking IREE compiler tool iree-compile
 Error code: -11
 Diagnostics:
 Please report issues to https://github.com/openxla/iree/issues and include the crash backtrace.
 Stack dump:
 0.	Program arguments: /nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-embedded-linker-path=/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true
 #0 0x00007f7094204cbd llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:11
 #1 0x00007f70942051ab PrintStackTraceSignalHandler(void*) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1
 #2 0x00007f7094203236 llvm::sys::RunSignalHandlers() /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:105:5
 #3 0x00007f7094205935 SignalHandler(int) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #4 0x00007f7088e38420 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x14420)
 #5 0x00007f7094036874 llvm::detail::PunnedPointer<mlir::Type>::asInt() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/PointerIntPair.h:41:5
 #6 0x00007f70940367d5 llvm::detail::PunnedPointer<mlir::Type>::operator long() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/PointerIntPair.h:45:41
 #7 0x00007f7094036775 llvm::PointerIntPair<mlir::Type, 3u, mlir::detail::ValueImpl::Kind, llvm::PointerLikeTypeTraits<mlir::Type>, llvm::PointerIntPairInfo<mlir::Type, 3u, llvm::PointerLikeTypeTraits<mlir::Type>>>::getPointer() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/PointerIntPair.h:94:58
 #8 0x00007f7094036449 mlir::detail::ValueImpl::getType() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Value.h:63:45
 #9 0x00007f709406bec8 mlir::Value::getType() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Value.h:125:39
 #10 0x00007f7096798990 mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::QuantizedMatmulRewriter::precondition() /nodclouddata/chi/src/iree/compiler/src/iree/compiler/GlobalOptimization/FuseDequantizationMatmul.cpp:330:61
 #11 0x00007f70967982de mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::reassociateDequantMatmul(mlir::RewriterBase&, mlir::linalg::GenericOp, mlir::linalg::GenericOp, int) /nodclouddata/chi/src/iree/compiler/src/iree/compiler/GlobalOptimization/FuseDequantizationMatmul.cpp:767:18
 #12 0x00007f7096797aaa mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FuseDequantizationMatmulPass::runOnOperation() /nodclouddata/chi/src/iree/compiler/src/iree/compiler/GlobalOptimization/FuseDequantizationMatmul.cpp:843:18
 #13 0x00007f709460147b mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:0:17
 #14 0x00007f7094601415 void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1>(long) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:5
 #15 0x00007f70941342c9 llvm::function_ref<void ()>::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:5
 #16 0x00007f7094604385 void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3
 #17 0x00007f70945fcc33 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:509:17
 #18 0x00007f70945fd1b4 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:569:16
 #19 0x00007f70946026c8 mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0::operator()(mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo&) const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:789:36
 #20 0x00007f7094602349 mlir::LogicalResult mlir::failableParallelForEach<__gnu_cxx::__normal_iterator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>>, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&>(mlir::MLIRContext*, __gnu_cxx::__normal_iterator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>>, __gnu_cxx::__normal_iterator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>>, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Threading.h:46:18
 #21 0x00007f70945fe46b mlir::LogicalResult mlir::failableParallelForEach<std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>&, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&>(mlir::MLIRContext*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>&, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Threading.h:92:10
 #22 0x00007f70945fdd79 mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:799:14
 #23 0x00007f70945fd8a7 mlir::detail::OpToOpPassAdaptor::runOnOperation(bool) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:690:5
 #24 0x00007f7094601466 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:501:11
 #25 0x00007f7094601415 void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1>(long) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:5
 #26 0x00007f70941342c9 llvm::function_ref<void ()>::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:5
 #27 0x00007f7094604385 void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3
 #28 0x00007f70945fcc33 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:509:17
 #29 0x00007f70945fd1b4 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:569:16
 #30 0x00007f70945febf9 mlir::PassManager::runPasses(mlir::Operation*, mlir::AnalysisManager) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:880:10
 #31 0x00007f70945feb22 mlir::PassManager::run(mlir::Operation*) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:860:60
 #32 0x00007f709408cacf mlir::iree_compiler::embed::(anonymous namespace)::Invocation::runPipeline(iree_compiler_pipeline_t) /nodclouddata/chi/src/iree/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp:958:27
 #33 0x00007f709408c3a3 ireeCompilerInvocationPipeline /nodclouddata/chi/src/iree/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp:1388:3
 #34 0x00007f7094580b40 mlir::iree_compiler::runIreecMain(int, char**)::$_0::operator()(iree_compiler_source_t*) const /nodclouddata/chi/src/iree/compiler/src/iree/compiler/Tools/iree_compile_lib.cc:247:11
 #35 0x00007f7094580087 mlir::iree_compiler::runIreecMain(int, char**) /nodclouddata/chi/src/iree/compiler/src/iree/compiler/Tools/iree_compile_lib.cc:348:9
 #36 0x00007f70940cca5b ireeCompilerRunMain /nodclouddata/chi/src/iree/compiler/src/iree/compiler/API/Internal/IREECompileToolEntryPoint.cpp:12:3
 #37 0x000055cff52b97f2 main /nodclouddata/chi/src/iree/compiler/bindings/python/IREECompileTool.c:9:35
 #38 0x00007f7088c56083 __libc_start_main /build/glibc-BHL3KM/glibc-2.31/csu/../csu/libc-start.c:342:3
 #39 0x000055cff52b970e _start (/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-compile+0x170e)


 Invoked with:
 iree-compile /nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-embedded-linker-path=/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true

 Need more information? Set IREE_SAVE_TEMPS=/some/dir in your environment to save all artifacts and reproducers.