Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Last active January 27, 2025 22:05
Show Gist options
  • Save AmosLewis/89e914b80c39e112662c5173aad5e298 to your computer and use it in GitHub Desktop.
Save AmosLewis/89e914b80c39e112662c5173aad5e298 to your computer and use it in GitHub Desktop.
# wget https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b/prefill_args_bs4_128_stride_32/cs_f16.npy
# wget https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b/prefill_args_bs4_128_stride_32/seq_block_ids.npy
# wget https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b/prefill_args_bs4_128_stride_32/seq_lens.npy
# wget https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b/prefill_args_bs4_128_stride_32/tokens.npy
import numpy as np
import torch
prefills = ['cs_f16','seq_block_ids','seq_lens','tokens']
for prefill in prefills:
prefill_npy = prefill+'.npy'
# Load the .npy file
numpy_array = np.load(prefill_npy)
print("data size: ", prefill, numpy_array.size)
print("data type: ", prefill, numpy_array.dtype)
# data size: cs_f16 268435456
# data type: cs_f16 float16
# data size: seq_block_ids 16
# data type: seq_block_ids int64
# data size: seq_lens 4
# data type: seq_lens int64
# data size: tokens 512
# data type: tokens int64
# Convert the NumPy array to a PyTorch tensor
tensor = torch.from_numpy(numpy_array)
# Convert the tensor to bfloat16 type
tensor_bfloat16 = tensor.to(torch.bfloat16)
bf16_prefill_bin = 'bf16_'+ prefill +'.bin'
# Save the tensor to a .bin file
with open(bf16_prefill_bin, 'wb') as f:
torch.save(tensor_bfloat16, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment