Last active
March 30, 2024 23:18
-
-
Save AfroThundr3007730/2be1846d81e46869c3cd33d7b7a0c0b9 to your computer and use it in GitHub Desktop.
Wrapper around DiscordChatExporter for automatic channel media archiving
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Wrapper around DiscordChatExporter for automatic channel media archiving | |
# SPDX-License-Identifier: GPL-3.0-or-later | |
dce.set_globals() { | |
AUTHOR='AfroThundr' | |
BASENAME="${0##*/}" | |
MODIFIED='20240304' | |
VERSION='0.3.1' | |
DCECMD=${DCEDIR:-$HOME/.local/libexec/dce}/DiscordChatExporter.Cli.dll | |
DCETOKEN=${XDG_CONFIG_HOME:-$HOME/.config}/dce/token | |
LOCKFILE=${XDG_RUNTIME_DIR:-/run/user/$UID}/$BASENAME.lock | |
START=2015-05-13 | |
WORKDIR=${WORKDIR:-${XDG_STATE_HOME:-$HOME/.local/state}/dumps/dc-dump} | |
} | |
dce.parse_args() { | |
[[ -n $1 ]] || { | |
printf 'No arguments specified, use -h for help.\n' | |
exit 0 | |
} | |
while [[ -n $1 ]]; do | |
if [[ $1 == -v ]]; then | |
printf '%s: Version %s, updated %s by %s\n' \ | |
"$BASENAME" "$VERSION" "$MODIFIED" "$AUTHOR" | |
shift | |
[[ -n $1 ]] || exit 0 | |
elif [[ $1 == -h ]]; then | |
printf 'DiscordChatExporter wrapper for automatic incremental archiving\n\n' | |
printf 'Usage: %s -h | [-q] [-d <timestamp>] -g <guild_id> -c <channel_id(s)>\n\n' "$BASENAME" | |
printf 'Options:\n' | |
printf ' -c Channel ID of channel to be archived.\n' | |
printf ' Multiple IDs can be passed comma-separated.\n' | |
printf ' All channels must belong to the same guild.\n' | |
printf ' -d Date to start archiving from (defaults to all history)\n' | |
printf ' For subsequent runs, will resume from last run timestamp.\n' | |
printf ' -g Guild (Server) ID of channel(s) to be archived.\n' | |
printf ' Required to populate the export folder structure.\n' | |
printf ' -h Display this help text.\n' | |
printf ' -q Suppress console output.\n' | |
exit 0 | |
elif [[ $1 == -q ]]; then | |
QUIET=true | |
shift | |
elif [[ $1 == -c && -n $2 ]]; then | |
IFS=, read -ra channel_ids <<<"$2" | |
shift 2 | |
elif [[ $1 == -d && -n $2 ]]; then | |
START=$2 | |
shift 2 | |
elif [[ $1 == -g && -n $2 ]]; then | |
guild_id=$2 | |
shift 2 | |
else | |
printf 'Invalid argument specified, use -h for help.\n' | |
exit 0 | |
fi | |
done | |
} | |
utils.say() { | |
[[ -z $QUIET ]] && printf '%s: %s\n' "$(date -u +%FT%TZ)" "$@" | |
} | |
dce.validate_state() { | |
# TODO: Expand this (check for commands, directories, other input, etc) | |
local broken command core_deps extra_deps | |
# Dependency checks | |
core_deps=(cat date ln ls mkdir nice rm rmdir sleep stat touch xargs) | |
extra_deps=(awk dotnet find grep ionice jq) | |
for command in "${core_deps[@]}"; do | |
command -V "$command" &>/dev/null || { | |
utils.say "Command missing from PATH: $command" | |
utils.say "You need to install coreutils to continue." | |
broken=1 && break | |
} | |
done | |
for command in "${extra_deps[@]}"; do | |
command -V "$command" &>/dev/null || { | |
utils.say "Command missing from PATH: $command" | |
utils.say 'You need to install its package to continue.' | |
broken=1 | |
} | |
done | |
[[ $broken ]] && { | |
utils.say "Dependencies checks failed. Exiting." | |
exit 1 | |
} | |
# DCE configuration | |
[[ -d $DCEDIR && -f $DCECMD ]] || { | |
utils.say "${DCECMD##*/} not found in path: $DCEDIR" | |
utils.say "Ensure DCEDIR points to the directory containing ${DCECMD##*/}" | |
broken=1 | |
} | |
dotnet --list-runtimes | awk '!($2 >= 8.0) {exit 1}' || { | |
utils.say 'DiscordChatExporter requires .NET runtime 8.0 or greter.' | |
utils.say 'Ensure the proper runtime is installed with: dotnet --list-runtimes' | |
broken=1 | |
} | |
[[ -n $DISCORD_TOKEN || -s $DCETOKEN && $(awk '/^DISCORD_TOKEN=/' <"$DCETOKEN") ]] || { | |
utils.say "Unable to parse discord token file: $DCETOKEN" | |
utils.say 'Populate with DISCORD_TOKEN=<token> or export DISCORD_TOKEN directly.' | |
broken=1 | |
} | |
[[ $broken ]] && { | |
utils.say "DiscordChatExporter checks failed. Exiting." | |
exit 1 | |
} | |
# shellcheck disable=SC1090 | |
[[ -z $DISCORD_TOKEN ]] && . "$DCETOKEN" && export DISCORD_TOKEN | |
# Host sanity checks | |
[[ -w $WORKDIR ]] || mkdir -p "$WORKDIR" || { | |
utils.say "Unable to write to working directory: $WORKDIR" | |
utils.say "Ensure it exists and is writeable, or set WORKDIR to a location that is." | |
broken=1 | |
} | |
# shellcheck disable=SC2015 | |
ping -c 3 discord.com &>/dev/null && ping -c 3 cdn.discordapp.com &>/dev/null || { | |
utils.say 'Unable to reach the Discord servers.' | |
utils.say 'Ensure your IP and DNS settings are correct.' | |
broken=1 | |
} | |
[[ -e $LOCKFILE ]] && { | |
utils.say "Lock file $LOCKFILE already exists. Not starting a new instance." | |
utils.say 'Ensure no other instance of this script is running, then remove, if necessary.' | |
broken=1 | |
} | |
[[ $broken ]] && { | |
utils.say "Host sanity checks failed. Exiting." | |
exit 1 | |
} | |
# Input validation | |
[[ $guild_id =~ [0-9]+ ]] || { | |
utils.say 'Got invalid guild ID. Use -h for help.' | |
exit 1 | |
} | |
[[ ${#channel_ids[@]} -gt 0 ]] || { | |
utils.say 'No channel ID(s) found. Use -h for help.' | |
exit 1 | |
} | |
for channel_id in "${channel_ids[@]}"; do | |
[[ ${channel_id// /} && $channel_id =~ [0-9]+ ]] || { | |
utils.say 'Got invalid channel ID. Use -h for help.' | |
exit 1 | |
} | |
done | |
} | |
dce.get_guild_list() { | |
utils.say 'Getting updated server list for user.' | |
dotnet "$DCECMD" guilds >"$WORKDIR"/guilds.txt 2>/dev/null || { | |
utils.say 'Error occurred getting server list.' | |
return 1 | |
} | |
} | |
dce.get_channel_list() { | |
utils.say 'Gettting updated channel list for server.' | |
dotnet "$DCECMD" channels -g "$guild_id" >"$guild_dir"/channels.txt 2>/dev/null || { | |
utils.say 'Error occurred getting channel list.' | |
return 1 | |
} | |
} | |
dce.get_guild_info() { | |
local guild_info | |
[[ -f $WORKDIR/guilds.txt && | |
$(stat -c %Y "$WORKDIR"/guilds.txt) -gt $((EPOCHSECONDS - 86400 * 7)) ]] || | |
dce.get_guild_list | |
utils.say "Looking up server ID: $guild_id" | |
guild_info=$(grep "$guild_id" "$WORKDIR"/guilds.txt 2>/dev/null) | |
guild_name=${guild_info##*| } | |
guild_name=${guild_name//\//_} | |
[[ -n $guild_name ]] || { | |
utils.say 'Server ID not found.' | |
return 1 | |
} | |
utils.say "Found server name: $guild_name" | |
} | |
dce.get_channel_info() { | |
local channel_info | |
[[ -f $guild_dir/channels.txt && | |
$(stat -c %Y "$guild_dir"/channels.txt) -gt $((EPOCHSECONDS - 86400 * 7)) ]] || | |
dce.get_channel_list | |
utils.say "Looking up channel ID: $channel_id" | |
channel_info=$(grep "$channel_id" "$guild_dir"/channels.txt 2>/dev/null) | |
channel_name=${channel_info##*| } | |
channel_name=${channel_name// \/ / | } | |
channel_name=${channel_name//\//_} | |
[[ -n $channel_name ]] || { | |
utils.say 'Channel ID not found.' | |
return 1 | |
} | |
utils.say "Found channel name: $channel_name" | |
} | |
dce.prepare_layout() { | |
local channel_ndir guild_ndir | |
guild_dir=$WORKDIR/$guild_id | |
channel_dir=$guild_dir/$channel_id | |
utils.say 'Preparing directory layout.' | |
dce.get_guild_info && { | |
guild_ndir=$WORKDIR/$guild_name | |
[[ -d $guild_dir ]] || mkdir -p "$guild_dir" | |
[[ -L $guild_ndir ]] || ln -fsr "$guild_dir" "$guild_ndir" | |
} && dce.get_channel_info && { | |
channel_ndir=$guild_dir/$channel_name | |
[[ -d $channel_dir ]] || mkdir -p "$channel_dir" | |
[[ -L $channel_ndir ]] || ln -fsr "$channel_dir" "$channel_ndir" | |
} | |
[[ -d $guild_dir && -d $channel_dir && -L $guild_ndir && -L $channel_ndir ]] || { | |
utils.say 'Directory setup did not complete.' | |
return 1 | |
} | |
} | |
dce.download_channel() { | |
local current_run end last_run out tries | |
staging_dir=$WORKDIR/staging/$guild_id/$channel_id | |
current_run=$(date -uIns) | |
last_run=$(cat "$channel_dir"/lastrun 2>/dev/null) | |
utils.say "Starting channel download: $guild_name | $channel_name" | |
utils.say "Using staging directory: $staging_dir" | |
while [[ -z $end && $tries -lt 3 ]]; do | |
out=$( | |
nice -n 19 ionice -c 3 dotnet "$DCECMD" export \ | |
--format json --markdown false --media --reuse-media \ | |
--output "$staging_dir"/"$current_run".json \ | |
--media-dir "$staging_dir"/_media/ \ | |
--channel "$channel_id" --after "${last_run:-$START}" 2>&1 | |
) && end=1 | |
[[ $out =~ 'does not contain any messages within the specified period' ]] && { | |
utils.say 'No new content to archive for this channel.' && end=1 | |
} | |
((tries++)) && sleep 1 | |
done | |
[[ -z $end ]] && { | |
utils.say 'Error downloading channel.' | |
return 1 | |
} | |
printf '%s\n' "$current_run" >"$channel_dir"/lastrun | |
utils.say 'Channel download completed.' | |
} | |
dce.sort_media() { | |
local file json_files | |
json_files=("$staging_dir"/*.json) | |
[[ -d $staging_dir && $(ls -A "${json_files[@]}" 2>/dev/null) ]] || return 1 | |
find "$staging_dir"/_media -type f -size -1024c -delete | |
rm -f "$staging_dir"/_media/{LevelUp,levelup}* | |
utils.say 'Moving media files of interest to channel archive.' | |
utils.say "Canonical channel archive path: $channel_dir" | |
utils.say "Named channel archive path: $WORKDIR/$guild_name/$channel_name" | |
mkdir -p "$channel_dir"/{attachments,embeds} | |
jq -r '.messages[].attachments[].url' "${json_files[@]}" | | |
xargs -I {} mv "$staging_dir"/{} "$channel_dir"/attachments 2>/dev/null | |
jq -r '.messages[].embeds[] | select(.video != null) | .video.url' "${json_files[@]}" | | |
xargs -I {} mv "$staging_dir"/{} "$channel_dir"/embeds 2>/dev/null | |
jq -r '.messages[].embeds[] | select(.images != null) | .images[].url' "${json_files[@]}" | | |
xargs -I {} mv "$staging_dir"/{} "$channel_dir"/embeds 2>/dev/null | |
shopt -s globstar nullglob | |
for file in "$channel_dir"/**/*%3A*; do mv "$file" "${file%%%3A*}"; done | |
shopt -u globstar nullglob | |
utils.say 'Purging unneeded media files from staging area.' | |
jq -r '.guild.iconUrl' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
jq -r '.messages[].author.avatarUrl' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
jq -r '.messages[].embeds[].thumbnail.url' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
jq -r '.messages[].embeds[].author.iconUrl' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
jq -r '.messages[].embeds[].footer.iconUrl' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
jq -r '.messages[].reactions[].emoji.imageUrl' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
jq -r '.messages[].reactions[].users[].avatarUrl' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
jq -r '.messages[].stickers[].sourceUrl' "${json_files[@]}" | | |
xargs -I {} rm -f "$staging_dir"/{} | |
rm -f "${json_files[@]}" | |
[[ $(ls -A "$staging_dir"/_media) ]] && | |
utils.say "Staging media directory not empty. Cleanup needed." | |
find "$WORKDIR"/staging -type d -empty -delete | |
utils.say 'Channel archive completed.' | |
} | |
dce.start_archive() { | |
dce.set_globals | |
dce.parse_args "$@" | |
dce.validate_state | |
utils.say ' * Beginning channel archiving... * ' | |
printf '%d\n' $$ >"$LOCKFILE" | |
for channel_id in "${channel_ids[@]}"; do | |
dce.prepare_layout && dce.download_channel && dce.sort_media | |
done | |
utils.say ' * All channel archiving complete. * ' | |
rm -f "$LOCKFILE" | |
exit 0 | |
} | |
# Only execute if not being sourced | |
[[ ${BASH_SOURCE[0]} == "$0" ]] && dce.start_archive "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This deliberately only preserves media files (embeds and attachments) from the channel history. Quite useful for archiving image dumps and meme channels. For a utility that does incremental archiving and keeps the full channel message data, you may want to check out this other project which goes well with the frontent here.