jacobkahn · June 28, 2024 15:46
diff --git a/error.log b/error.log
 python src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py --input_dir ~/chameleon/data/ --output_dir ~/chameleon/hf_converted/30B --model_size 30B
 Fetching all parameters from the checkpoint at ~/chameleon/data/models/30b.
 Loading the checkpoint in a Chameleon model...
 ****************************************************************************************************
 Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:30<00:00,  2.02s/it]
 Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
 Traceback (most recent call last):
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py", line 417, in <module>
    main()
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py", line 408, in main
    write_model(
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py", line 360, in write_model
    out = model.generate(**inputs, max_new_tokens=40, do_sample=False)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/generation/utils.py", line 1909, in generate
    result = self._sample(
             ^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/generation/utils.py", line 2646, in _sample
    outputs = self(
              ^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/accelerate/hooks.py", line 166, in new_forward
    output = module._old_forward(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/modeling_chameleon.py", line 1609, in forward
    outputs = self.model(
              ^^^^^^^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/modeling_chameleon.py", line 1403, in forward
    layer_outputs = decoder_layer(
                    ^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/modeling_chameleon.py", line 806, in forward
    hidden_states = residual + hidden_states
                    ~~~~~~~~~^~~~~~~~~~~~~~~
 RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!
	python src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py --input_dir ~/chameleon/data/ --output_dir ~/chameleon/hf_converted/30B --model_size 30B
	Fetching all parameters from the checkpoint at ~/chameleon/data/models/30b.
	Loading the checkpoint in a Chameleon model...
	****************************************************************************************************
	Loading checkpoint shards: 100%\|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 15/15 [00:30<00:00, 2.02s/it]
	Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
	Traceback (most recent call last):
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py", line 417, in <module>
	main()
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py", line 408, in main
	write_model(
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py", line 360, in write_model
	out = model.generate(**inputs, max_new_tokens=40, do_sample=False)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
	return func(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/generation/utils.py", line 1909, in generate
	result = self._sample(
	^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/generation/utils.py", line 2646, in _sample
	outputs = self(
	^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
	return self._call_impl(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/accelerate/hooks.py", line 166, in new_forward
	output = module._old_forward(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/modeling_chameleon.py", line 1609, in forward
	outputs = self.model(
	^^^^^^^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
	return self._call_impl(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/modeling_chameleon.py", line 1403, in forward
	layer_outputs = decoder_layer(
	^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
	return self._call_impl(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/micromamba/envs/chameleon_hf/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/storage/home/jacobkahn/chameleon/transformers/src/transformers/models/chameleon/modeling_chameleon.py", line 806, in forward
	hidden_states = residual + hidden_states
	~~~~~~~~~^~~~~~~~~~~~~~~
	RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!
No results found