Last active
May 5, 2024 02:52
-
-
Save lg/5a0e82742936659c5c36cd96f45c505b to your computer and use it in GitHub Desktop.
Download channels from Discord and save as a webarchive for offline reading
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Make sure to have a .env with `DISCORD_TOKEN=your_token_here` in the same directory as this script. | |
# Get your token as per: https://github.com/Tyrrrz/DiscordChatExporter/blob/master/.docs/Token-and-IDs.md | |
dce() { docker run -it --rm --env-file .env --volume "$(pwd)/out":/out tyrrrz/discordchatexporter:latest "$@"; } | |
ffmpeg() { docker run --rm --volume "$(pwd)/out":/out lscr.io/linuxserver/ffmpeg:latest "$@"; } | |
bun() { docker run -i --rm --volume "$(pwd)":/home/bun/app oven/bun:latest "$@"; } | |
jq() { docker run -i --rm --volume "$(pwd)":/docker-files -w /docker-files badouralix/curl-jq jq "$@"; } | |
# Ensure all files/tools exist | |
if [ ! -e .env ] || ! command -v docker >/dev/null 2>&1 || ! command -v webarchiver >/dev/null 2>&1; then | |
echo "You need a .env file with DISCORD_TOKEN in it, and you must have docker + webarchiver installed" | |
exit 1 | |
fi | |
# Input for what to download | |
echo "Getting Discord state" | |
bun run discord-state.ts > discord-state.json | |
jq -r '.guilds[] | "\(.id) | \(.name)"' discord-state.json | |
echo "Which guild id would you like to download from?"; read -r guild_id | |
jq -r '.guilds[] | select(.id == "'"$guild_id"'") | .channels | sort_by(.position) | .[] | "\(.id) | \(.name)"' discord-state.json | |
echo "Which channel id?"; read -r channel_id | |
echo "Getting read states" | |
last_read_id=$(jq -r '.read_state.[] | select(.id == "'"$channel_id"'") | .last_message_id' discord-state.json) | |
echo "Last read messageid: $last_read_id" | |
echo "Download from what timestamp/messageid (0 for beginning)?"; read -r start_id | |
echo "What name should we assign to the download (no spaces)?"; read -r name | |
rm -f discord-state.json | |
# Download | |
dce export --media --reuse-media --media-dir media --markdown True --format HtmlDark --locale Pacific \ | |
--channel "$channel_id" --after "$start_id" --output "$name.html" | |
echo "Converting videos to animated gifs" # Needed for webarchive for iOS/iPadOS | |
videos=$(grep -oE 'src=media/[^"]*.mp4' "out/$name.html") | |
errors=0 | |
if [[ -n "$videos" ]]; then | |
while IFS= read -r match; do | |
mp4_filename_with_media=$(echo "$match" | cut -d'=' -f2) | |
gif_filename_with_media="${mp4_filename_with_media%.*}.gif" | |
if ! ffmpeg -nostdin -hide_banner -loglevel error -y -i "/out/$mp4_filename_with_media" -vf "scale=320:-1,fps=10" "/out/$gif_filename_with_media"; then errors=1; fi | |
sed -i "" "s|<video.*$match.*</video>|<img class=chatlog__embed-generic-gifv src=\"$gif_filename_with_media\" \/>|g" "out/$name.html" | |
done <<< "$videos" | |
fi | |
# Remove lazy loading and generate webarchive files and remove html files | |
echo "Fixing image urls and converting to webarchive" | |
sed -i "" 's/loading="*lazy"*//g' "out/$name.html" # Disable lazyloading so webarchiver downloads everything | |
sed -i "" -e ':a' -e 's/\(src=[^ >]*\)%/\1@!@25/g; t a' "out/$name.html" # dce has a bug when % is in the url | |
sed -i "" -e ':a' -e 's/\(src=[^ >]*\)@!@/\1%/g; t a' "out/$name.html" # replace the intermediary @!@ with % to prevent infinite loops | |
echo "Converting to webarchive" | |
if ! webarchiver -url "out/$name.html" -output "./out/$name.webarchive"; then errors=1; fi | |
[ "$errors" -eq 0 ] && rm "out/$name.html" || echo "There were errors, not deleting html file out/$name.html" | |
echo "Completed" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Run with `bun run discord-state.ts` | |
const envFile: string = await Bun.file(".env").text() | |
const token = process.env.DISCORD_TOKEN | |
if (!token) { | |
console.error("No DISCORD_TOKEN env variable found (incl in the .env file)") | |
process.exit(1) | |
} | |
const socket = new WebSocket("wss://gateway.discord.gg/?encoding=json&v=9") | |
socket.addEventListener("message", event => { | |
const wsData = JSON.parse(event.data) | |
if (wsData.op === 0 && wsData.t === "READY") { | |
console.log(JSON.stringify(wsData.d, null, 2)) | |
socket.close() | |
} | |
}) | |
socket.addEventListener("open", event => { | |
socket.send(JSON.stringify({ "op": 2, "d": { "token": token, "properties": {}, "presence": {}, "compress": false, | |
"client_state": { "guild_versions": {} } } })) | |
}) | |
socket.addEventListener("error", event => { | |
console.error("Error transacting with Discord") | |
console.error(event) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment