Skip to content

Instantly share code, notes, and snippets.

@sinkingsugar
Created January 13, 2025 01:49
Show Gist options
  • Save sinkingsugar/eaebae52cbf61c05c3668646915b26e0 to your computer and use it in GitHub Desktop.
Save sinkingsugar/eaebae52cbf61c05c3668646915b26e0 to your computer and use it in GitHub Desktop.
HF Models Download, Git LFS sucks
# pip install huggingface_hub tqdm
from huggingface_hub import snapshot_download, hf_hub_download
import concurrent.futures
from tqdm import tqdm
import os
MODEL_ID = "meta-llama/Llama-3.1-405B-Instruct-FP8"
OUTPUT_DIR = "./llama_model"
TOTAL_SHARDS = 109
MAX_PARALLEL = 4 # Adjust based on your bandwidth/preferences
print("πŸ“š Downloading support files...")
# Get all support files first
snapshot_download(
repo_id=MODEL_ID,
allow_patterns=[
"config.json",
"generation_config.json",
"tokenizer*",
"*.md",
"*.py"
],
ignore_patterns=["model-*.safetensors"],
local_dir=OUTPUT_DIR
)
def download_shard(shard_num):
filename = f"model-{shard_num:05d}-of-00109.safetensors"
try:
return hf_hub_download(
repo_id=MODEL_ID,
filename=filename,
local_dir=OUTPUT_DIR,
resume_download=True # Resumes interrupted downloads
)
except Exception as e:
return f"Error downloading shard {shard_num}: {str(e)}"
print(f"πŸš€ Downloading {TOTAL_SHARDS} model shards with {MAX_PARALLEL} parallel downloads...")
with tqdm(total=TOTAL_SHARDS, desc="Downloading shards") as pbar:
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_PARALLEL) as executor:
# Create future tasks for all shards
futures = {executor.submit(download_shard, i): i for i in range(1, TOTAL_SHARDS + 1)}
# Process completed downloads
for future in concurrent.futures.as_completed(futures):
shard_num = futures[future]
try:
result = future.result()
if isinstance(result, str) and result.startswith("Error"):
print(f"\n❌ {result}")
else:
pbar.update(1)
except Exception as e:
print(f"\n❌ Error processing shard {shard_num}: {str(e)}")
# Verify all files are downloaded
print("\nπŸ” Verifying downloads...")
expected_files = set(f"model-{i:05d}-of-00109.safetensors" for i in range(1, TOTAL_SHARDS + 1))
actual_files = set(f for f in os.listdir(OUTPUT_DIR) if f.endswith('.safetensors'))
missing_files = expected_files - actual_files
if missing_files:
print(f"❌ Missing {len(missing_files)} files:")
for f in sorted(missing_files):
print(f" - {f}")
else:
print("βœ… All files downloaded successfully!")
# Print total size
total_size = sum(os.path.getsize(os.path.join(OUTPUT_DIR, f)) for f in os.listdir(OUTPUT_DIR))
print(f"\nπŸ“¦ Total download size: {total_size / (1024**3):.2f} GB")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment