Skip to content

Instantly share code, notes, and snippets.

@dfischer
Forked from IGLOU-EU/midjourney_backup.sh
Created December 19, 2024 01:12
Show Gist options
  • Save dfischer/f18f23ab984571fdbd9915bc4cfd36aa to your computer and use it in GitHub Desktop.
Save dfischer/f18f23ab984571fdbd9915bc4cfd36aa to your computer and use it in GitHub Desktop.
✨ A simple script to backup your Midjourney archives by downloading JSON data and images. Easy to set up with clear instructions, it ensures your creative work is safely stored. Contributions and feedback are welcome!
#!/bin/bash
## LICENCE ##
# Copyright (C) 2024 Kara Adrien
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
## DOCUMENTATION ##
# This script automates the backup of your Midjourney archives by downloading
# JSON data and associated images.
#
# It should work on macOS and Windows using a Linux-like terminal (e.g. WSL)
# but it hasn't been tested on these platforms.
#
# NOTE: The script can take a significant amount of time to complete, depending on the number of images to download.
#
# REQUIREMENTS:
# - Bash shell
# - curl command-line tool
# - jq for processing JSON data
# - awk for generating random sleep intervals
#
# SPEC:
# - Includes a random sleep interval to avoid being blocked by the server.
# - Supports resuming by skipping already downloaded images.
# - Handles different job types, including upscaler jobs.
#
# USAGE INSTRUCTIONS:
# 1. Download the script to your local machine.
# 2. Set your configuration (Look at the next section)
# 3. Set the root directory for backups in DIR_ROOT if needed.
# 4. Run the script: ./backup.sh
#
## SET YOUR CONFIGURATION ##
# Go to https://www.midjourney.com/profile-settings
# And copy the "Midjourney ID" into USER_ID
USER_ID=''
# To retrieve cookies:
# Note: The process should be similar for Chromium-based browsers.
#
# 1. Log in to Midjourney and navigate to the archives page: https://www.midjourney.com/archive
# 2. Right-click on the page and select "Inspect" to open Developer Tools.
# 3. Go to the "Storage" tab.
# 4. Find and copy the values of "__Host-Midjourney.AuthUserTokenV3_i"
# and "__Host-Midjourney.AuthUserTokenV3_r".
# 5. Paste these values into the USER_COOKIES_AuthUserTokenV3_i and
# USER_COOKIES_AuthUserTokenV3_r variables respectively.
USER_COOKIES_AuthUserTokenV3_i=''
USER_COOKIES_AuthUserTokenV3_r=''
# Set the root directory for the backup
DIR_ROOT="."
# Define the directory for JSON files within the root directory
DIR_JSON="${DIR_ROOT}/prompt"
# Define the directory for image files within the root directory
DIR_IMAGES="${DIR_ROOT}/images"
## THIS IS THE SOFTWARE, DON'T TOUCH IT ##
set -e
function build_url() {
local cursor="$1"
if [[ -n $cursor ]]; then
cursor="&cursor=${cursor}"
fi
echo "https://www.midjourney.com/api/pg/thomas-jobs?user_id=${USER_ID}&page_size=1000${cursor}"
}
function dl_dataset() {
local out="$1"
local url="$2"
if ! curl "$url" --compressed \
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0' \
-H 'Accept: */*' \
-H 'Accept-Language: fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3' \
-H 'Accept-Encoding: gzip, deflate' \
-H 'Referer: https://www.midjourney.com/archive' \
-H 'Content-Type: application/json' \
-H 'X-CSRF-Protection: 1' \
-H 'DNT: 1' \
-H 'Sec-GPC: 1' \
-H 'Alt-Used: www.midjourney.com' \
-H 'Connection: keep-alive' \
-H "Cookie: __Host-Midjourney.AuthUserTokenV3_i=${USER_COOKIES_AuthUserTokenV3_i}; __Host-Midjourney.AuthUserTokenV3_r=${USER_COOKIES_AuthUserTokenV3_r}" \
-H 'Sec-Fetch-Dest: empty' \
-H 'Sec-Fetch-Mode: no-cors' \
-H 'Sec-Fetch-Site: same-origin' \
-H 'Priority: u=4' \
-H 'Pragma: no-cache' \
-H 'Cache-Control: no-cache' \
-H 'TE: trailers' \
-o "$out"; then
echo "❌ Failed to download dataset from $url" >&2
exit 1
fi
}
function dl_image() {
local out="$1"
local url="$2"
if [ -e "$out" ]; then
echo "=> Already downloaded $out"
return
fi
echo "=> Download $url"
sleep "$(awk -v min=0.1 -v max=3.0 'BEGIN{srand(); print min+rand()*(max-min)}')"
if ! curl -s --fail "$url" \
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0' \
-H 'Referer: https://www.midjourney.com/archive' \
-H 'X-CSRF-Protection: 1' \
-H 'DNT: 1' \
-H 'Sec-GPC: 1' \
-H 'Alt-Used: www.midjourney.com' \
-H 'Connection: keep-alive' \
-H 'Sec-Fetch-Dest: empty' \
-H 'Sec-Fetch-Mode: no-cors' \
-H 'Sec-Fetch-Site: same-origin' \
-H 'Priority: u=4' \
-H 'Pragma: no-cache' \
-H 'Cache-Control: no-cache' \
-H 'TE: trailers' \
-o "$out"; then
echo "❌ Failed to download image from $url" >&2
rm -f "$out"
fi
}
function dl_images() {
local file="$1"
jq -c '.data[]' "$file" | while read -r item; do
local item_id
item_id="$(echo "$item" | jq -r '.id')"
echo "[$item_id]"
local job_type
job_type="$(echo "$item" | jq -r '.job_type')"
local enqueue_time
enqueue_time="$(echo "$item" | jq -r '.enqueue_time')"
local unix_time
unix_time=$(date -d "$enqueue_time" +%s)
local batch_size
batch_size="$(echo "$item" | jq -r '.batch_size')"
local parent_id parent_grid
parent_id="$(echo "$item" | jq -r '.parent_id')"
parent_grid="$(echo "$item" | jq -r '.parent_grid')"
if [ -n "$parent_id" ] && [ "$parent_id" != "null" ] && [ -n "$parent_grid" ] && [ "$parent_grid" != "null" ]; then
image_url="https://cdn.midjourney.com/${parent_id}/0_${parent_grid}.png"
output_image="${DIR_IMAGES}/parent_${parent_id}_${parent_grid}.png"
echo "==> [Parent]> "
dl_image "$output_image" "$image_url"
echo "<== <[Parent] "
fi
case $job_type in
*_virtual_*)
echo "=> Skip virtual job $item_id - $job_type"
continue
;;
esac
for batch in $(seq 0 "$((batch_size - 1))"); do
image_url="https://cdn.midjourney.com/${item_id}/0_${batch}.png"
output_image="${DIR_IMAGES}/${unix_time}_${item_id}_${batch}.png"
dl_image "$output_image" "$image_url" &
done
wait
done
}
required_commands=("jq" "magick" "curl" "awk")
missing=()
for cmd in "${required_commands[@]}"; do
if ! command -v "$cmd" &>/dev/null; then
missing+=("$cmd")
fi
done
if [ ${#missing[@]} -ne 0 ]; then
echo "❌ The following required commands are not installed:" >&2
for cmd in "${missing[@]}"; do
echo " - $cmd"
done
echo "Please install them and try again." >&2
exit 1
fi
if [ -z "$USER_ID" ] || [ -z "$USER_COOKIES_AuthUserTokenV3_i" ] || [ -z "$USER_COOKIES_AuthUserTokenV3_r" ]; then
echo "😱 Hoho, you forgot to configure something important!" >&2
echo "⚠️ Read the instruction at the top of this script." >&2
exit 1
fi
if ! mkdir -p "$DIR_JSON" "$DIR_IMAGES"; then
echo "❌ Error: Failed to create directories." >&2
exit 1
fi
nb=1
cursor="first"
while [ "$cursor" != "" ]; do
if [[ $cursor == "first" ]]; then
cursor=""
fi
url="$(build_url "$cursor")"
file="${DIR_JSON}/user_${USER_ID}-ID${nb}.json"
dl_dataset "$file" "$url"
if ! cursor="$(jq -r '.cursor // empty' "$file")"; then
echo "❌ Error: Outch, an error with json file, that bad, try again latter." >&2
break
fi
dl_images "$file"
((nb++))
done
echo "Checking if there is corrupted images"
nb=0
while read -r img; do
if ! magick "$img" null: 2>/dev/null; then
echo "=> Removal of a likely corrupted image $(basename "$img")"
rm "$img"
((nb++))
fi
done <<<"$(find "$DIR_IMAGES" -type f)"
cat <<EOF
🎉 You have successfully downloaded $(find "$DIR_IMAGES" -type f | wc -l) images! 📸
🗂️ For a total size of $(du -sh "$DIR_IMAGES" | cut -f1) 💾
💥 And $nb images as failed the integrity check 💣
🔄 You can run the script as many times as you want to retry.
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment