Last active
December 15, 2023 00:49
-
-
Save AmosLewis/7f9d6644a3dc0bb4acad4af98273ade5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(shark.venv) ➜ SHARK git:(main) ✗ python nan/chatglm.py | |
shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag | |
[DEBUG] generating mlir on device | |
/nodclouddata/chi/src/SHARK/nan/chatglm.py:103: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). | |
input_ids = torch.tensor(input_ids) | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
torch.utils._pytree._register_pytree_node( | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
torch.utils._pytree._register_pytree_node( | |
Loading checkpoint shards: 100%|████████████████████████████████████████████████| 7/7 [00:59<00:00, 8.48s/it] | |
Model before quantization: ChatGLMForConditionalGeneration( | |
(transformer): ChatGLMModel( | |
(embedding): Embedding( | |
(word_embeddings): Embedding(65024, 4096) | |
) | |
(rotary_pos_emb): RotaryEmbedding() | |
(encoder): GLMTransformer( | |
(layers): ModuleList( | |
(0-27): 28 x GLMBlock( | |
(input_layernorm): RMSNorm() | |
(self_attention): SelfAttention( | |
(query_key_value): Linear(in_features=4096, out_features=4608, bias=True) | |
(core_attention): CoreAttention( | |
(attention_dropout): Dropout(p=0.0, inplace=False) | |
) | |
(dense): Linear(in_features=4096, out_features=4096, bias=False) | |
) | |
(post_attention_layernorm): RMSNorm() | |
(mlp): MLP( | |
(dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False) | |
(dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False) | |
) | |
) | |
) | |
(final_layernorm): RMSNorm() | |
) | |
(output_layer): Linear(in_features=4096, out_features=65024, bias=False) | |
) | |
) | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/immutable_collections.py:114: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
_register_pytree_node(immutable_dict, _immutable_dict_flatten, _immutable_dict_unflatten) | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/immutable_collections.py:115: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
_register_pytree_node(immutable_list, _immutable_list_flatten, _immutable_list_unflatten) | |
Applying weight quantization.. | |
Weight quantization applied. | |
Model after quantization: ChatGLMForConditionalGeneration( | |
(transformer): ChatGLMModel( | |
(embedding): Embedding( | |
(word_embeddings): Embedding(65024, 4096) | |
) | |
(rotary_pos_emb): RotaryEmbedding() | |
(encoder): GLMTransformer( | |
(layers): ModuleList( | |
(0-27): 28 x GLMBlock( | |
(input_layernorm): RMSNorm() | |
(self_attention): SelfAttention( | |
(query_key_value): QuantLinear( | |
in_features=4096, out_features=4608, bias=True | |
(input_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(output_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(weight_quant): WeightQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
(tensor_quant): RescalingIntQuant( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
(scaling_impl): ExpandReshapeScalingWrapper( | |
(wrapped_scaling_impl): ParameterFromStatsFromParameterScaling( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): AbsMinMax( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(stats_scaling_impl): _StatsScaling( | |
(affine_rescaling): Identity() | |
(restrict_clamp_scaling): _RestrictClampValue( | |
(clamp_min_ste): ScalarClampMinSte() | |
(restrict_value_impl): FloatRestrictValue() | |
) | |
(restrict_scaling_pre): Identity() | |
) | |
(restrict_inplace_preprocess): Identity() | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(int_scaling_impl): IntScaling() | |
(zero_point_impl): ExpandReshapeZeroPointWrapper( | |
(wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): NegativeMinOrZero( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(scale_shift_zero_point): _ScaleShiftZeroPoint( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
) | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(msb_clamp_bit_width_impl): BitWidthConst( | |
(bit_width): StatelessBuffer() | |
) | |
) | |
) | |
(bias_quant): BiasQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
) | |
(core_attention): CoreAttention( | |
(attention_dropout): Dropout(p=0.0, inplace=False) | |
) | |
(dense): QuantLinear( | |
in_features=4096, out_features=4096, bias=False | |
(input_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(output_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(weight_quant): WeightQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
(tensor_quant): RescalingIntQuant( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
(scaling_impl): ExpandReshapeScalingWrapper( | |
(wrapped_scaling_impl): ParameterFromStatsFromParameterScaling( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): AbsMinMax( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(stats_scaling_impl): _StatsScaling( | |
(affine_rescaling): Identity() | |
(restrict_clamp_scaling): _RestrictClampValue( | |
(clamp_min_ste): ScalarClampMinSte() | |
(restrict_value_impl): FloatRestrictValue() | |
) | |
(restrict_scaling_pre): Identity() | |
) | |
(restrict_inplace_preprocess): Identity() | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(int_scaling_impl): IntScaling() | |
(zero_point_impl): ExpandReshapeZeroPointWrapper( | |
(wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): NegativeMinOrZero( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(scale_shift_zero_point): _ScaleShiftZeroPoint( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
) | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(msb_clamp_bit_width_impl): BitWidthConst( | |
(bit_width): StatelessBuffer() | |
) | |
) | |
) | |
(bias_quant): BiasQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
) | |
) | |
(post_attention_layernorm): RMSNorm() | |
(mlp): MLP( | |
(dense_h_to_4h): QuantLinear( | |
in_features=4096, out_features=27392, bias=False | |
(input_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(output_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(weight_quant): WeightQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
(tensor_quant): RescalingIntQuant( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
(scaling_impl): ExpandReshapeScalingWrapper( | |
(wrapped_scaling_impl): ParameterFromStatsFromParameterScaling( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): AbsMinMax( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(stats_scaling_impl): _StatsScaling( | |
(affine_rescaling): Identity() | |
(restrict_clamp_scaling): _RestrictClampValue( | |
(clamp_min_ste): ScalarClampMinSte() | |
(restrict_value_impl): FloatRestrictValue() | |
) | |
(restrict_scaling_pre): Identity() | |
) | |
(restrict_inplace_preprocess): Identity() | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(int_scaling_impl): IntScaling() | |
(zero_point_impl): ExpandReshapeZeroPointWrapper( | |
(wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): NegativeMinOrZero( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(scale_shift_zero_point): _ScaleShiftZeroPoint( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
) | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(msb_clamp_bit_width_impl): BitWidthConst( | |
(bit_width): StatelessBuffer() | |
) | |
) | |
) | |
(bias_quant): BiasQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
) | |
(dense_4h_to_h): QuantLinear( | |
in_features=13696, out_features=4096, bias=False | |
(input_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(output_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(weight_quant): WeightQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
(tensor_quant): RescalingIntQuant( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
(scaling_impl): ExpandReshapeScalingWrapper( | |
(wrapped_scaling_impl): ParameterFromStatsFromParameterScaling( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): AbsMinMax( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(stats_scaling_impl): _StatsScaling( | |
(affine_rescaling): Identity() | |
(restrict_clamp_scaling): _RestrictClampValue( | |
(clamp_min_ste): ScalarClampMinSte() | |
(restrict_value_impl): FloatRestrictValue() | |
) | |
(restrict_scaling_pre): Identity() | |
) | |
(restrict_inplace_preprocess): Identity() | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(int_scaling_impl): IntScaling() | |
(zero_point_impl): ExpandReshapeZeroPointWrapper( | |
(wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): NegativeMinOrZero( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(scale_shift_zero_point): _ScaleShiftZeroPoint( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
) | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(msb_clamp_bit_width_impl): BitWidthConst( | |
(bit_width): StatelessBuffer() | |
) | |
) | |
) | |
(bias_quant): BiasQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
) | |
) | |
) | |
) | |
(final_layernorm): RMSNorm() | |
) | |
(output_layer): QuantLinear( | |
in_features=4096, out_features=65024, bias=False | |
(input_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(output_quant): ActQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
(weight_quant): WeightQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
(tensor_quant): RescalingIntQuant( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
(scaling_impl): ExpandReshapeScalingWrapper( | |
(wrapped_scaling_impl): ParameterFromStatsFromParameterScaling( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): AbsMinMax( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(stats_scaling_impl): _StatsScaling( | |
(affine_rescaling): Identity() | |
(restrict_clamp_scaling): _RestrictClampValue( | |
(clamp_min_ste): ScalarClampMinSte() | |
(restrict_value_impl): FloatRestrictValue() | |
) | |
(restrict_scaling_pre): Identity() | |
) | |
(restrict_inplace_preprocess): Identity() | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(int_scaling_impl): IntScaling() | |
(zero_point_impl): ExpandReshapeZeroPointWrapper( | |
(wrapped_zero_point_impl): ParameterFromStatsFromParameterZeroPoint( | |
(parameter_list_stats): _ParameterListStats( | |
(first_tracked_param): _ViewParameterWrapper( | |
(view_shape_impl): OverSubChannelBlockView( | |
(permute_impl): Identity() | |
) | |
) | |
(stats): _Stats( | |
(stats_impl): NegativeMinOrZero( | |
(zero): StatelessBuffer() | |
) | |
) | |
) | |
(scale_shift_zero_point): _ScaleShiftZeroPoint( | |
(int_quant): IntQuant( | |
(float_to_int_impl): RoundSte() | |
(tensor_clamp_impl): TensorClampSte() | |
(delay_wrapper): DelayWrapper( | |
(delay_impl): _NoDelay() | |
) | |
) | |
) | |
) | |
(slice_tensor): SliceTensor() | |
) | |
(msb_clamp_bit_width_impl): BitWidthConst( | |
(bit_width): StatelessBuffer() | |
) | |
) | |
) | |
(bias_quant): BiasQuantProxyFromInjector( | |
(_zero_hw_sentinel): StatelessBuffer() | |
) | |
) | |
) | |
) | |
[DEBUG] generating torchscript graph | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/experimental/proxy_tensor.py:97: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
pytree._register_pytree_node(torch.Size, lambda x: (list(x), None), lambda xs, _: tuple(xs)) | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/torch/utils/_pytree.py:254: UserWarning: <class 'torch.Size'> is already registered as pytree node. Overwriting the previous registration. | |
warnings.warn( | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/torch/_tensor.py:1394: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at ../c10/core/TensorImpl.h:1908.) | |
return super().rename(names) | |
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/brevitas/backport/fx/node.py:335: UserWarning: Trying to prepend a node to itself. This behavior has no effect on the graph. | |
warnings.warn( | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
found an upcasting block let's upcast it. | |
[DEBUG] Compiling torchscript graph | |
[DEBUG] Lowering Torch -> Linalg | |
[DEBUG] Successfully Generated mlir on device | |
[DEBUG] converting to bytecode | |
Saved falcon mlir at chatglm-6b-int4.mlir | |
Compiling for device : cpu-task | |
Configuring for device:cpu-task | |
Target triple found:x86_64-linux-gnu | |
Traceback (most recent call last): | |
File "/nodclouddata/chi/src/SHARK/nan/chatglm.py", line 170, in <module> | |
path = shark_module.save_module( | |
^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/nodclouddata/chi/src/SHARK/shark/shark_inference.py", line 213, in save_module | |
return export_iree_module_to_vmfb( | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/nodclouddata/chi/src/SHARK/shark/iree_utils/compile_utils.py", line 554, in export_iree_module_to_vmfb | |
flatbuffer_blob = compile_module_to_flatbuffer( | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/nodclouddata/chi/src/SHARK/shark/iree_utils/compile_utils.py", line 338, in compile_module_to_flatbuffer | |
flatbuffer_blob = ireec.compile_file( | |
^^^^^^^^^^^^^^^^^^^ | |
File "/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/core.py", line 255, in compile_file | |
result = invoke_immediate(cl) | |
^^^^^^^^^^^^^^^^^^^^ | |
File "/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/binaries.py", line 198, in invoke_immediate | |
raise CompilerToolError(process) | |
iree.compiler.tools.binaries.CompilerToolError: Error invoking IREE compiler tool iree-compile | |
Error code: -11 | |
Diagnostics: | |
Please report issues to https://github.com/openxla/iree/issues and include the crash backtrace. | |
Stack dump: | |
0. Program arguments: /nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-embedded-linker-path=/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true | |
#0 0x00007f7094204cbd llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:11 | |
#1 0x00007f70942051ab PrintStackTraceSignalHandler(void*) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1 | |
#2 0x00007f7094203236 llvm::sys::RunSignalHandlers() /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:105:5 | |
#3 0x00007f7094205935 SignalHandler(int) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1 | |
#4 0x00007f7088e38420 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x14420) | |
#5 0x00007f7094036874 llvm::detail::PunnedPointer<mlir::Type>::asInt() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/PointerIntPair.h:41:5 | |
#6 0x00007f70940367d5 llvm::detail::PunnedPointer<mlir::Type>::operator long() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/PointerIntPair.h:45:41 | |
#7 0x00007f7094036775 llvm::PointerIntPair<mlir::Type, 3u, mlir::detail::ValueImpl::Kind, llvm::PointerLikeTypeTraits<mlir::Type>, llvm::PointerIntPairInfo<mlir::Type, 3u, llvm::PointerLikeTypeTraits<mlir::Type>>>::getPointer() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/PointerIntPair.h:94:58 | |
#8 0x00007f7094036449 mlir::detail::ValueImpl::getType() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Value.h:63:45 | |
#9 0x00007f709406bec8 mlir::Value::getType() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Value.h:125:39 | |
#10 0x00007f7096798990 mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::QuantizedMatmulRewriter::precondition() /nodclouddata/chi/src/iree/compiler/src/iree/compiler/GlobalOptimization/FuseDequantizationMatmul.cpp:330:61 | |
#11 0x00007f70967982de mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::reassociateDequantMatmul(mlir::RewriterBase&, mlir::linalg::GenericOp, mlir::linalg::GenericOp, int) /nodclouddata/chi/src/iree/compiler/src/iree/compiler/GlobalOptimization/FuseDequantizationMatmul.cpp:767:18 | |
#12 0x00007f7096797aaa mlir::iree_compiler::GlobalOptimization::(anonymous namespace)::FuseDequantizationMatmulPass::runOnOperation() /nodclouddata/chi/src/iree/compiler/src/iree/compiler/GlobalOptimization/FuseDequantizationMatmul.cpp:843:18 | |
#13 0x00007f709460147b mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:0:17 | |
#14 0x00007f7094601415 void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1>(long) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:5 | |
#15 0x00007f70941342c9 llvm::function_ref<void ()>::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:5 | |
#16 0x00007f7094604385 void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3 | |
#17 0x00007f70945fcc33 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:509:17 | |
#18 0x00007f70945fd1b4 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:569:16 | |
#19 0x00007f70946026c8 mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0::operator()(mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo&) const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:789:36 | |
#20 0x00007f7094602349 mlir::LogicalResult mlir::failableParallelForEach<__gnu_cxx::__normal_iterator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>>, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&>(mlir::MLIRContext*, __gnu_cxx::__normal_iterator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>>, __gnu_cxx::__normal_iterator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>>, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Threading.h:46:18 | |
#21 0x00007f70945fe46b mlir::LogicalResult mlir::failableParallelForEach<std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>&, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&>(mlir::MLIRContext*, std::vector<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo, std::allocator<mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::OpPMInfo>>&, mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool)::$_0&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/Threading.h:92:10 | |
#22 0x00007f70945fdd79 mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:799:14 | |
#23 0x00007f70945fd8a7 mlir::detail::OpToOpPassAdaptor::runOnOperation(bool) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:690:5 | |
#24 0x00007f7094601466 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:501:11 | |
#25 0x00007f7094601415 void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::$_1>(long) /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:5 | |
#26 0x00007f70941342c9 llvm::function_ref<void ()>::operator()() const /nodclouddata/chi/src/iree/third_party/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:5 | |
#27 0x00007f7094604385 void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3 | |
#28 0x00007f70945fcc33 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:509:17 | |
#29 0x00007f70945fd1b4 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:569:16 | |
#30 0x00007f70945febf9 mlir::PassManager::runPasses(mlir::Operation*, mlir::AnalysisManager) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:880:10 | |
#31 0x00007f70945feb22 mlir::PassManager::run(mlir::Operation*) /nodclouddata/chi/src/iree/third_party/llvm-project/mlir/lib/Pass/Pass.cpp:860:60 | |
#32 0x00007f709408cacf mlir::iree_compiler::embed::(anonymous namespace)::Invocation::runPipeline(iree_compiler_pipeline_t) /nodclouddata/chi/src/iree/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp:958:27 | |
#33 0x00007f709408c3a3 ireeCompilerInvocationPipeline /nodclouddata/chi/src/iree/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp:1388:3 | |
#34 0x00007f7094580b40 mlir::iree_compiler::runIreecMain(int, char**)::$_0::operator()(iree_compiler_source_t*) const /nodclouddata/chi/src/iree/compiler/src/iree/compiler/Tools/iree_compile_lib.cc:247:11 | |
#35 0x00007f7094580087 mlir::iree_compiler::runIreecMain(int, char**) /nodclouddata/chi/src/iree/compiler/src/iree/compiler/Tools/iree_compile_lib.cc:348:9 | |
#36 0x00007f70940cca5b ireeCompilerRunMain /nodclouddata/chi/src/iree/compiler/src/iree/compiler/API/Internal/IREECompileToolEntryPoint.cpp:12:3 | |
#37 0x000055cff52b97f2 main /nodclouddata/chi/src/iree/compiler/bindings/python/IREECompileTool.c:9:35 | |
#38 0x00007f7088c56083 __libc_start_main /build/glibc-BHL3KM/glibc-2.31/csu/../csu/libc-start.c:342:3 | |
#39 0x000055cff52b970e _start (/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-compile+0x170e) | |
Invoked with: | |
iree-compile /nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-embedded-linker-path=/nodclouddata/chi/src/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true | |
Need more information? Set IREE_SAVE_TEMPS=/some/dir in your environment to save all artifacts and reproducers. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python lib version
IREE VERSION USED