Last active
December 19, 2024 01:12
-
-
Save IGLOU-EU/d6b30237010aec797bf4919a7b186d86 to your computer and use it in GitHub Desktop.
✨ A simple script to backup your Midjourney archives by downloading JSON data and images. Easy to set up with clear instructions, it ensures your creative work is safely stored. Contributions and feedback are welcome!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
## LICENCE ## | |
# Copyright (C) 2024 Kara Adrien | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <https://www.gnu.org/licenses/>. | |
## DOCUMENTATION ## | |
# This script automates the backup of your Midjourney archives by downloading | |
# JSON data and associated images. | |
# | |
# It should work on macOS and Windows using a Linux-like terminal (e.g. WSL) | |
# but it hasn't been tested on these platforms. | |
# | |
# NOTE: The script can take a significant amount of time to complete, depending on the number of images to download. | |
# | |
# REQUIREMENTS: | |
# - Bash shell | |
# - curl command-line tool | |
# - jq for processing JSON data | |
# - awk for generating random sleep intervals | |
# | |
# SPEC: | |
# - Includes a random sleep interval to avoid being blocked by the server. | |
# - Supports resuming by skipping already downloaded images. | |
# - Handles different job types, including upscaler jobs. | |
# | |
# USAGE INSTRUCTIONS: | |
# 1. Download the script to your local machine. | |
# 2. Set your configuration (Look at the next section) | |
# 3. Set the root directory for backups in DIR_ROOT if needed. | |
# 4. Run the script: ./backup.sh | |
# | |
## SET YOUR CONFIGURATION ## | |
# Go to https://www.midjourney.com/profile-settings | |
# And copy the "Midjourney ID" into USER_ID | |
USER_ID='' | |
# To retrieve cookies: | |
# Note: The process should be similar for Chromium-based browsers. | |
# | |
# 1. Log in to Midjourney and navigate to the archives page: https://www.midjourney.com/archive | |
# 2. Right-click on the page and select "Inspect" to open Developer Tools. | |
# 3. Go to the "Storage" tab. | |
# 4. Find and copy the values of "__Host-Midjourney.AuthUserTokenV3_i" | |
# and "__Host-Midjourney.AuthUserTokenV3_r". | |
# 5. Paste these values into the USER_COOKIES_AuthUserTokenV3_i and | |
# USER_COOKIES_AuthUserTokenV3_r variables respectively. | |
USER_COOKIES_AuthUserTokenV3_i='' | |
USER_COOKIES_AuthUserTokenV3_r='' | |
# Set the root directory for the backup | |
DIR_ROOT="." | |
# Define the directory for JSON files within the root directory | |
DIR_JSON="${DIR_ROOT}/prompt" | |
# Define the directory for image files within the root directory | |
DIR_IMAGES="${DIR_ROOT}/images" | |
## THIS IS THE SOFTWARE, DON'T TOUCH IT ## | |
set -e | |
function build_url() { | |
local cursor="$1" | |
if [[ -n $cursor ]]; then | |
cursor="&cursor=${cursor}" | |
fi | |
echo "https://www.midjourney.com/api/pg/thomas-jobs?user_id=${USER_ID}&page_size=1000${cursor}" | |
} | |
function dl_dataset() { | |
local out="$1" | |
local url="$2" | |
if ! curl "$url" --compressed \ | |
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0' \ | |
-H 'Accept: */*' \ | |
-H 'Accept-Language: fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3' \ | |
-H 'Accept-Encoding: gzip, deflate' \ | |
-H 'Referer: https://www.midjourney.com/archive' \ | |
-H 'Content-Type: application/json' \ | |
-H 'X-CSRF-Protection: 1' \ | |
-H 'DNT: 1' \ | |
-H 'Sec-GPC: 1' \ | |
-H 'Alt-Used: www.midjourney.com' \ | |
-H 'Connection: keep-alive' \ | |
-H "Cookie: __Host-Midjourney.AuthUserTokenV3_i=${USER_COOKIES_AuthUserTokenV3_i}; __Host-Midjourney.AuthUserTokenV3_r=${USER_COOKIES_AuthUserTokenV3_r}" \ | |
-H 'Sec-Fetch-Dest: empty' \ | |
-H 'Sec-Fetch-Mode: no-cors' \ | |
-H 'Sec-Fetch-Site: same-origin' \ | |
-H 'Priority: u=4' \ | |
-H 'Pragma: no-cache' \ | |
-H 'Cache-Control: no-cache' \ | |
-H 'TE: trailers' \ | |
-o "$out"; then | |
echo "❌ Failed to download dataset from $url" >&2 | |
exit 1 | |
fi | |
} | |
function dl_image() { | |
local out="$1" | |
local url="$2" | |
if [ -e "$out" ]; then | |
echo "=> Already downloaded $out" | |
return | |
fi | |
echo "=> Download $url" | |
sleep "$(awk -v min=0.1 -v max=3.0 'BEGIN{srand(); print min+rand()*(max-min)}')" | |
if ! curl -s --fail "$url" \ | |
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0' \ | |
-H 'Referer: https://www.midjourney.com/archive' \ | |
-H 'X-CSRF-Protection: 1' \ | |
-H 'DNT: 1' \ | |
-H 'Sec-GPC: 1' \ | |
-H 'Alt-Used: www.midjourney.com' \ | |
-H 'Connection: keep-alive' \ | |
-H 'Sec-Fetch-Dest: empty' \ | |
-H 'Sec-Fetch-Mode: no-cors' \ | |
-H 'Sec-Fetch-Site: same-origin' \ | |
-H 'Priority: u=4' \ | |
-H 'Pragma: no-cache' \ | |
-H 'Cache-Control: no-cache' \ | |
-H 'TE: trailers' \ | |
-o "$out"; then | |
echo "❌ Failed to download image from $url" >&2 | |
rm -f "$out" | |
fi | |
} | |
function dl_images() { | |
local file="$1" | |
jq -c '.data[]' "$file" | while read -r item; do | |
local item_id | |
item_id="$(echo "$item" | jq -r '.id')" | |
echo "[$item_id]" | |
local job_type | |
job_type="$(echo "$item" | jq -r '.job_type')" | |
local enqueue_time | |
enqueue_time="$(echo "$item" | jq -r '.enqueue_time')" | |
local unix_time | |
unix_time=$(date -d "$enqueue_time" +%s) | |
local batch_size | |
batch_size="$(echo "$item" | jq -r '.batch_size')" | |
local parent_id parent_grid | |
parent_id="$(echo "$item" | jq -r '.parent_id')" | |
parent_grid="$(echo "$item" | jq -r '.parent_grid')" | |
if [ -n "$parent_id" ] && [ "$parent_id" != "null" ] && [ -n "$parent_grid" ] && [ "$parent_grid" != "null" ]; then | |
image_url="https://cdn.midjourney.com/${parent_id}/0_${parent_grid}.png" | |
output_image="${DIR_IMAGES}/parent_${parent_id}_${parent_grid}.png" | |
echo "==> [Parent]> " | |
dl_image "$output_image" "$image_url" | |
echo "<== <[Parent] " | |
fi | |
case $job_type in | |
*_virtual_*) | |
echo "=> Skip virtual job $item_id - $job_type" | |
continue | |
;; | |
esac | |
for batch in $(seq 0 "$((batch_size - 1))"); do | |
image_url="https://cdn.midjourney.com/${item_id}/0_${batch}.png" | |
output_image="${DIR_IMAGES}/${unix_time}_${item_id}_${batch}.png" | |
dl_image "$output_image" "$image_url" & | |
done | |
wait | |
done | |
} | |
required_commands=("jq" "magick" "curl" "awk") | |
missing=() | |
for cmd in "${required_commands[@]}"; do | |
if ! command -v "$cmd" &>/dev/null; then | |
missing+=("$cmd") | |
fi | |
done | |
if [ ${#missing[@]} -ne 0 ]; then | |
echo "❌ The following required commands are not installed:" >&2 | |
for cmd in "${missing[@]}"; do | |
echo " - $cmd" | |
done | |
echo "Please install them and try again." >&2 | |
exit 1 | |
fi | |
if [ -z "$USER_ID" ] || [ -z "$USER_COOKIES_AuthUserTokenV3_i" ] || [ -z "$USER_COOKIES_AuthUserTokenV3_r" ]; then | |
echo "😱 Hoho, you forgot to configure something important!" >&2 | |
echo "⚠️ Read the instruction at the top of this script." >&2 | |
exit 1 | |
fi | |
if ! mkdir -p "$DIR_JSON" "$DIR_IMAGES"; then | |
echo "❌ Error: Failed to create directories." >&2 | |
exit 1 | |
fi | |
nb=1 | |
cursor="first" | |
while [ "$cursor" != "" ]; do | |
if [[ $cursor == "first" ]]; then | |
cursor="" | |
fi | |
url="$(build_url "$cursor")" | |
file="${DIR_JSON}/user_${USER_ID}-ID${nb}.json" | |
dl_dataset "$file" "$url" | |
if ! cursor="$(jq -r '.cursor // empty' "$file")"; then | |
echo "❌ Error: Outch, an error with json file, that bad, try again latter." >&2 | |
break | |
fi | |
dl_images "$file" | |
((nb++)) | |
done | |
echo "Checking if there is corrupted images" | |
nb=0 | |
while read -r img; do | |
if ! magick "$img" null: 2>/dev/null; then | |
echo "=> Removal of a likely corrupted image $(basename "$img")" | |
rm "$img" | |
((nb++)) | |
fi | |
done <<<"$(find "$DIR_IMAGES" -type f)" | |
cat <<EOF | |
🎉 You have successfully downloaded $(find "$DIR_IMAGES" -type f | wc -l) images! 📸 | |
🗂️ For a total size of $(du -sh "$DIR_IMAGES" | cut -f1) 💾 | |
💥 And $nb images as failed the integrity check 💣 | |
🔄 You can run the script as many times as you want to retry. | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment