Last active
August 15, 2024 14:42
-
-
Save tin2tin/2ee71b898be894b6cd4cd17605d54f7b to your computer and use it in GitHub Desktop.
Download, get md5 and produce json, for GPT4ALL models
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from tqdm import tqdm | |
from pathlib import Path | |
import hashlib | |
import json | |
def download_file_with_progress(url, save_path): | |
""" | |
Downloads a file from a URL with a progress bar. | |
Args: | |
- url (str): The URL of the file to download. | |
- save_path (Path): The local path where the file should be saved, including the filename. | |
""" | |
# Ensure the directory exists | |
save_path.parent.mkdir(parents=True, exist_ok=True) | |
# Send a GET request to the URL | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
# Get the total file size from the headers | |
total_size = int(response.headers.get('content-length', 0)) | |
# Open the file in binary write mode | |
with open(save_path, "wb") as file: | |
# Create a progress bar | |
with tqdm(total=total_size, unit='B', unit_scale=True, desc=str(save_path), initial=0, ascii=True) as pbar: | |
# Write the response content to the file in chunks | |
for chunk in response.iter_content(chunk_size=8192): | |
if chunk: | |
file.write(chunk) | |
pbar.update(len(chunk)) | |
print(f"File downloaded successfully and saved to {save_path}") | |
def calculate_md5(file_path): | |
""" | |
Calculates the MD5 checksum of a file. | |
Args: | |
- file_path (Path): The path of the file. | |
Returns: | |
- str: The MD5 checksum of the file. | |
""" | |
hash_md5 = hashlib.md5() | |
with open(file_path, "rb") as f: | |
for chunk in iter(lambda: f.read(4096), b""): | |
hash_md5.update(chunk) | |
return hash_md5.hexdigest() | |
def create_metadata_json(save_path, md5sum, filesize, url): | |
""" | |
Creates a metadata JSON file with the given details. | |
Args: | |
- save_path (Path): The local path of the downloaded file. | |
- md5sum (str): The MD5 checksum of the file. | |
- filesize (int): The size of the file in bytes. | |
""" | |
data = { | |
"order": "a", | |
"md5sum": md5sum, | |
"name": save_path.name, | |
"filename": save_path.name, | |
"filesize": str(filesize), | |
"requires": "3.1.1", | |
"ramrequired": "8", | |
"parameters": "8 billion", | |
"quant": "q8_0", | |
"type": "LLaMA3", | |
"description": "<ul><li>Fast responses</li><li>Chat based model</li><li>Large context size of 128k</li><li>Accepts agentic system prompts in Llama 3.1 format</li><li>Trained by Meta</li><li>License: <a href=\"https://llama.meta.com/llama3_1/license/\">Meta Llama 3.1 Community License</a></li></ul>", | |
"url": url, | |
"promptTemplate": "user\n\n%1assistant\n\n%2", | |
"systemPrompt": "system\nCutting Knowledge Date: December 2023\n\nYou are a helpful assistant." | |
} | |
# Save the JSON to a file | |
metadata_path = save_path.with_suffix('.json') | |
with open(metadata_path, "w") as json_file: | |
json.dump(data, json_file, indent=4) | |
print(f"Metadata JSON created successfully at {metadata_path}") | |
# Main execution | |
url = "https://huggingface.co/mradermacher/Danish-Mistral-Nemo-Base-GGUF/resolve/main/Danish-Mistral-Nemo-Base.Q8_0.gguf" | |
save_path = Path.home() / '.cache' / 'gpt4all' / 'Danish-Mistral-Nemo-Base.Q8_0.gguf' | |
# Download the file | |
download_file_with_progress(url, save_path) | |
# Calculate MD5 checksum and file size | |
md5sum = calculate_md5(save_path) | |
filesize = save_path.stat().st_size | |
# Create the metadata JSON file | |
create_metadata_json(save_path, md5sum, filesize, url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment