Last active
February 24, 2023 09:05
-
-
Save SteampunkEngi/a61c73545c27e9afc4b73316af7274dc to your computer and use it in GitHub Desktop.
SteampunkEngi's Shitty Scraper Script for CivitAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os.path | |
import re | |
import time | |
# SteampunkEngi's Shitty Scraper Script for CivitAI. Source: https://gist.github.com/SteapunkEngi/a61c73545c27e9afc4b73316af7274dc | |
# This script automatically scrapes all models of a type. downloads the model/Model/whatever + all metadata + thumbnail picture | |
# Made for python 3.10, other versions not tested | |
# Looking for manual downloads? see https://github.com/axsddlr/civitai_downloader | |
# Rename "Checkpoint" to 'Checkpoint' 'TextualInversion' 'Hypernetwork' 'AestheticGradient' or 'LORA'. Case sensitive | |
# If script crashes, see console for what page was last completely downloaded, add that number to page=0 | |
api_model_mostdownloaded_url = "https://civitai.com/api/v1/models?types=Checkpoint&sort=Most%20Downloaded&page=1" | |
#Change this to where you want your stuff to be saved | |
model_save_path = "C:\CIVITAI-Local\media\Checkpoint\\" | |
#### | |
#### That's it, you're done. Nothing to edit. Run this file now. | |
#### | |
#### | |
#Don't rename this: | |
api_url = "https://civitai.com/api/v1/models/" | |
# Request another link when the previous one has finished downloading? | |
manualDownload = False | |
repeatManualDownloads = True | |
automaticModelDownload = True | |
def getrequestURL(modelID): | |
completeURL = api_url + str(modelID) | |
return completeURL | |
def getID(): | |
modelLink = input("gib link: ") | |
# Get model ID based on link | |
modelID = modelLink.split("/")[4] | |
return modelID | |
def downloadModelWithMetadata(requestURL, downloadDirectory): | |
# insert model ID into api URL for get request | |
response = requests.get(requestURL) | |
while (response.status_code == 500 or response.status_code == 525): | |
print("Request returned 505. Trying again in 5 seconds.") | |
time.sleep(5) | |
response = requests.get(requestURL) | |
metadatajson = response.json() | |
# Get download URL from returned JSON | |
modelDownloadUrl = metadatajson["modelVersions"][0]["files"][0]["downloadUrl"] | |
downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True) | |
while (response.status_code == 500 or response.status_code == 525): | |
print("Request returned 505. Trying again in 5 seconds.") | |
time.sleep(5) | |
downloadRequest = requests.get(modelDownloadUrl, allow_redirects=True) | |
# Set directory to save to by combining save path with file name | |
name_of_model_file = metadatajson["modelVersions"][0]["files"][0]["name"] | |
name_of_model_file = name_of_model_file.replace( | |
"<", "").replace(">", "") | |
name_of_model_file_without_extention = name_of_model_file.split(".safetensors")[ | |
0].split(".pt")[0] | |
# download metadata as .metadata (you can open this with notepad) | |
combined_metadata_path = os.path.join( | |
downloadDirectory, name_of_model_file_without_extention + ".metainfo") | |
if not os.path.exists(combined_metadata_path): | |
with open(combined_metadata_path, "w", encoding="utf-8") as metainfo_file: | |
metainfo_file.write(str(metadatajson)) | |
else: | |
print("! Skipping metadata download for: " + | |
name_of_model_file + ". Reason: File already exists.") | |
# download model | |
combined_model_path = os.path.join( | |
downloadDirectory, name_of_model_file | |
) | |
if not os.path.exists(combined_model_path): | |
with open(combined_model_path, 'wb') as content_file: | |
content_file.write(downloadRequest.content) | |
else: | |
print("! Skipping model download for: " + | |
name_of_model_file + ". Reason: File already exists.") | |
# download picture | |
combined_image_path = os.path.join( | |
downloadDirectory, name_of_model_file_without_extention + ".preview.png") | |
image_download_url = metadatajson["modelVersions"][0]["images"][0]["url"] | |
pictureRequest = requests.get(image_download_url, allow_redirects=True) | |
if not os.path.exists(combined_image_path): | |
with open(combined_image_path, 'wb') as content_file: | |
content_file.write(pictureRequest.content) | |
else: | |
print("! Skipping image download for: " + | |
name_of_model_file + ". Reason: File already exists.") | |
print("Downloading done.") | |
def manualModelDownloading(): | |
requestURL = getrequestURL() | |
downloadModelWithMetadata(requestURL) | |
def downloadModelPage(metadatajson): | |
for Model in metadatajson["items"]: | |
ModelName = Model["name"] | |
saveFolder = model_save_path + re.sub(r'[^\w\d-]', '_', ModelName) | |
if not os.path.exists(saveFolder): | |
os.makedirs(saveFolder) | |
downloadModelWithMetadata(getrequestURL(Model["id"]), saveFolder) | |
print("Done downloading: " + ModelName) | |
print("--- Done downloading page: " + | |
str(metadatajson["metadata"]["currentPage"]) + " ---") | |
return | |
def downloadModels(api_Model_url): | |
# get all Models from highest to least highest rated | |
# get first page | |
# while True: | |
response = requests.get(api_Model_url) | |
while (response.status_code == 500 or response.status_code == 525): | |
print("Request returned 505. Trying again in 5 seconds.") | |
time.sleep(5) | |
response = requests.get(api_Model_url) | |
metadatajson = response.json() | |
downloadModelPage(metadatajson) | |
while (automaticModelDownload): | |
newApiModelUrl = metadatajson["metadata"]["nextPage"] | |
response = requests.get(newApiModelUrl) | |
metadatajson = response.json() | |
downloadModelPage(metadatajson) | |
def main(): | |
if (manualDownload): | |
# run at least once | |
manualModelDownloading() | |
while repeatManualDownloads: | |
manualModelDownloading() | |
else: | |
downloadModels(api_model_mostdownloaded_url) | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment