Skip to content

Instantly share code, notes, and snippets.

@AfroThundr3007730
Last active March 30, 2024 23:18
Show Gist options
  • Save AfroThundr3007730/2be1846d81e46869c3cd33d7b7a0c0b9 to your computer and use it in GitHub Desktop.
Save AfroThundr3007730/2be1846d81e46869c3cd33d7b7a0c0b9 to your computer and use it in GitHub Desktop.
Wrapper around DiscordChatExporter for automatic channel media archiving
#!/bin/bash
# Wrapper around DiscordChatExporter for automatic channel media archiving
# SPDX-License-Identifier: GPL-3.0-or-later
dce.set_globals() {
AUTHOR='AfroThundr'
BASENAME="${0##*/}"
MODIFIED='20240304'
VERSION='0.3.1'
DCECMD=${DCEDIR:-$HOME/.local/libexec/dce}/DiscordChatExporter.Cli.dll
DCETOKEN=${XDG_CONFIG_HOME:-$HOME/.config}/dce/token
LOCKFILE=${XDG_RUNTIME_DIR:-/run/user/$UID}/$BASENAME.lock
START=2015-05-13
WORKDIR=${WORKDIR:-${XDG_STATE_HOME:-$HOME/.local/state}/dumps/dc-dump}
}
dce.parse_args() {
[[ -n $1 ]] || {
printf 'No arguments specified, use -h for help.\n'
exit 0
}
while [[ -n $1 ]]; do
if [[ $1 == -v ]]; then
printf '%s: Version %s, updated %s by %s\n' \
"$BASENAME" "$VERSION" "$MODIFIED" "$AUTHOR"
shift
[[ -n $1 ]] || exit 0
elif [[ $1 == -h ]]; then
printf 'DiscordChatExporter wrapper for automatic incremental archiving\n\n'
printf 'Usage: %s -h | [-q] [-d <timestamp>] -g <guild_id> -c <channel_id(s)>\n\n' "$BASENAME"
printf 'Options:\n'
printf ' -c Channel ID of channel to be archived.\n'
printf ' Multiple IDs can be passed comma-separated.\n'
printf ' All channels must belong to the same guild.\n'
printf ' -d Date to start archiving from (defaults to all history)\n'
printf ' For subsequent runs, will resume from last run timestamp.\n'
printf ' -g Guild (Server) ID of channel(s) to be archived.\n'
printf ' Required to populate the export folder structure.\n'
printf ' -h Display this help text.\n'
printf ' -q Suppress console output.\n'
exit 0
elif [[ $1 == -q ]]; then
QUIET=true
shift
elif [[ $1 == -c && -n $2 ]]; then
IFS=, read -ra channel_ids <<<"$2"
shift 2
elif [[ $1 == -d && -n $2 ]]; then
START=$2
shift 2
elif [[ $1 == -g && -n $2 ]]; then
guild_id=$2
shift 2
else
printf 'Invalid argument specified, use -h for help.\n'
exit 0
fi
done
}
utils.say() {
[[ -z $QUIET ]] && printf '%s: %s\n' "$(date -u +%FT%TZ)" "$@"
}
dce.validate_state() {
# TODO: Expand this (check for commands, directories, other input, etc)
local broken command core_deps extra_deps
# Dependency checks
core_deps=(cat date ln ls mkdir nice rm rmdir sleep stat touch xargs)
extra_deps=(awk dotnet find grep ionice jq)
for command in "${core_deps[@]}"; do
command -V "$command" &>/dev/null || {
utils.say "Command missing from PATH: $command"
utils.say "You need to install coreutils to continue."
broken=1 && break
}
done
for command in "${extra_deps[@]}"; do
command -V "$command" &>/dev/null || {
utils.say "Command missing from PATH: $command"
utils.say 'You need to install its package to continue.'
broken=1
}
done
[[ $broken ]] && {
utils.say "Dependencies checks failed. Exiting."
exit 1
}
# DCE configuration
[[ -d $DCEDIR && -f $DCECMD ]] || {
utils.say "${DCECMD##*/} not found in path: $DCEDIR"
utils.say "Ensure DCEDIR points to the directory containing ${DCECMD##*/}"
broken=1
}
dotnet --list-runtimes | awk '!($2 >= 8.0) {exit 1}' || {
utils.say 'DiscordChatExporter requires .NET runtime 8.0 or greter.'
utils.say 'Ensure the proper runtime is installed with: dotnet --list-runtimes'
broken=1
}
[[ -n $DISCORD_TOKEN || -s $DCETOKEN && $(awk '/^DISCORD_TOKEN=/' <"$DCETOKEN") ]] || {
utils.say "Unable to parse discord token file: $DCETOKEN"
utils.say 'Populate with DISCORD_TOKEN=<token> or export DISCORD_TOKEN directly.'
broken=1
}
[[ $broken ]] && {
utils.say "DiscordChatExporter checks failed. Exiting."
exit 1
}
# shellcheck disable=SC1090
[[ -z $DISCORD_TOKEN ]] && . "$DCETOKEN" && export DISCORD_TOKEN
# Host sanity checks
[[ -w $WORKDIR ]] || mkdir -p "$WORKDIR" || {
utils.say "Unable to write to working directory: $WORKDIR"
utils.say "Ensure it exists and is writeable, or set WORKDIR to a location that is."
broken=1
}
# shellcheck disable=SC2015
ping -c 3 discord.com &>/dev/null && ping -c 3 cdn.discordapp.com &>/dev/null || {
utils.say 'Unable to reach the Discord servers.'
utils.say 'Ensure your IP and DNS settings are correct.'
broken=1
}
[[ -e $LOCKFILE ]] && {
utils.say "Lock file $LOCKFILE already exists. Not starting a new instance."
utils.say 'Ensure no other instance of this script is running, then remove, if necessary.'
broken=1
}
[[ $broken ]] && {
utils.say "Host sanity checks failed. Exiting."
exit 1
}
# Input validation
[[ $guild_id =~ [0-9]+ ]] || {
utils.say 'Got invalid guild ID. Use -h for help.'
exit 1
}
[[ ${#channel_ids[@]} -gt 0 ]] || {
utils.say 'No channel ID(s) found. Use -h for help.'
exit 1
}
for channel_id in "${channel_ids[@]}"; do
[[ ${channel_id// /} && $channel_id =~ [0-9]+ ]] || {
utils.say 'Got invalid channel ID. Use -h for help.'
exit 1
}
done
}
dce.get_guild_list() {
utils.say 'Getting updated server list for user.'
dotnet "$DCECMD" guilds >"$WORKDIR"/guilds.txt 2>/dev/null || {
utils.say 'Error occurred getting server list.'
return 1
}
}
dce.get_channel_list() {
utils.say 'Gettting updated channel list for server.'
dotnet "$DCECMD" channels -g "$guild_id" >"$guild_dir"/channels.txt 2>/dev/null || {
utils.say 'Error occurred getting channel list.'
return 1
}
}
dce.get_guild_info() {
local guild_info
[[ -f $WORKDIR/guilds.txt &&
$(stat -c %Y "$WORKDIR"/guilds.txt) -gt $((EPOCHSECONDS - 86400 * 7)) ]] ||
dce.get_guild_list
utils.say "Looking up server ID: $guild_id"
guild_info=$(grep "$guild_id" "$WORKDIR"/guilds.txt 2>/dev/null)
guild_name=${guild_info##*| }
guild_name=${guild_name//\//_}
[[ -n $guild_name ]] || {
utils.say 'Server ID not found.'
return 1
}
utils.say "Found server name: $guild_name"
}
dce.get_channel_info() {
local channel_info
[[ -f $guild_dir/channels.txt &&
$(stat -c %Y "$guild_dir"/channels.txt) -gt $((EPOCHSECONDS - 86400 * 7)) ]] ||
dce.get_channel_list
utils.say "Looking up channel ID: $channel_id"
channel_info=$(grep "$channel_id" "$guild_dir"/channels.txt 2>/dev/null)
channel_name=${channel_info##*| }
channel_name=${channel_name// \/ / | }
channel_name=${channel_name//\//_}
[[ -n $channel_name ]] || {
utils.say 'Channel ID not found.'
return 1
}
utils.say "Found channel name: $channel_name"
}
dce.prepare_layout() {
local channel_ndir guild_ndir
guild_dir=$WORKDIR/$guild_id
channel_dir=$guild_dir/$channel_id
utils.say 'Preparing directory layout.'
dce.get_guild_info && {
guild_ndir=$WORKDIR/$guild_name
[[ -d $guild_dir ]] || mkdir -p "$guild_dir"
[[ -L $guild_ndir ]] || ln -fsr "$guild_dir" "$guild_ndir"
} && dce.get_channel_info && {
channel_ndir=$guild_dir/$channel_name
[[ -d $channel_dir ]] || mkdir -p "$channel_dir"
[[ -L $channel_ndir ]] || ln -fsr "$channel_dir" "$channel_ndir"
}
[[ -d $guild_dir && -d $channel_dir && -L $guild_ndir && -L $channel_ndir ]] || {
utils.say 'Directory setup did not complete.'
return 1
}
}
dce.download_channel() {
local current_run end last_run out tries
staging_dir=$WORKDIR/staging/$guild_id/$channel_id
current_run=$(date -uIns)
last_run=$(cat "$channel_dir"/lastrun 2>/dev/null)
utils.say "Starting channel download: $guild_name | $channel_name"
utils.say "Using staging directory: $staging_dir"
while [[ -z $end && $tries -lt 3 ]]; do
out=$(
nice -n 19 ionice -c 3 dotnet "$DCECMD" export \
--format json --markdown false --media --reuse-media \
--output "$staging_dir"/"$current_run".json \
--media-dir "$staging_dir"/_media/ \
--channel "$channel_id" --after "${last_run:-$START}" 2>&1
) && end=1
[[ $out =~ 'does not contain any messages within the specified period' ]] && {
utils.say 'No new content to archive for this channel.' && end=1
}
((tries++)) && sleep 1
done
[[ -z $end ]] && {
utils.say 'Error downloading channel.'
return 1
}
printf '%s\n' "$current_run" >"$channel_dir"/lastrun
utils.say 'Channel download completed.'
}
dce.sort_media() {
local file json_files
json_files=("$staging_dir"/*.json)
[[ -d $staging_dir && $(ls -A "${json_files[@]}" 2>/dev/null) ]] || return 1
find "$staging_dir"/_media -type f -size -1024c -delete
rm -f "$staging_dir"/_media/{LevelUp,levelup}*
utils.say 'Moving media files of interest to channel archive.'
utils.say "Canonical channel archive path: $channel_dir"
utils.say "Named channel archive path: $WORKDIR/$guild_name/$channel_name"
mkdir -p "$channel_dir"/{attachments,embeds}
jq -r '.messages[].attachments[].url' "${json_files[@]}" |
xargs -I {} mv "$staging_dir"/{} "$channel_dir"/attachments 2>/dev/null
jq -r '.messages[].embeds[] | select(.video != null) | .video.url' "${json_files[@]}" |
xargs -I {} mv "$staging_dir"/{} "$channel_dir"/embeds 2>/dev/null
jq -r '.messages[].embeds[] | select(.images != null) | .images[].url' "${json_files[@]}" |
xargs -I {} mv "$staging_dir"/{} "$channel_dir"/embeds 2>/dev/null
shopt -s globstar nullglob
for file in "$channel_dir"/**/*%3A*; do mv "$file" "${file%%%3A*}"; done
shopt -u globstar nullglob
utils.say 'Purging unneeded media files from staging area.'
jq -r '.guild.iconUrl' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
jq -r '.messages[].author.avatarUrl' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
jq -r '.messages[].embeds[].thumbnail.url' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
jq -r '.messages[].embeds[].author.iconUrl' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
jq -r '.messages[].embeds[].footer.iconUrl' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
jq -r '.messages[].reactions[].emoji.imageUrl' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
jq -r '.messages[].reactions[].users[].avatarUrl' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
jq -r '.messages[].stickers[].sourceUrl' "${json_files[@]}" |
xargs -I {} rm -f "$staging_dir"/{}
rm -f "${json_files[@]}"
[[ $(ls -A "$staging_dir"/_media) ]] &&
utils.say "Staging media directory not empty. Cleanup needed."
find "$WORKDIR"/staging -type d -empty -delete
utils.say 'Channel archive completed.'
}
dce.start_archive() {
dce.set_globals
dce.parse_args "$@"
dce.validate_state
utils.say ' * Beginning channel archiving... * '
printf '%d\n' $$ >"$LOCKFILE"
for channel_id in "${channel_ids[@]}"; do
dce.prepare_layout && dce.download_channel && dce.sort_media
done
utils.say ' * All channel archiving complete. * '
rm -f "$LOCKFILE"
exit 0
}
# Only execute if not being sourced
[[ ${BASH_SOURCE[0]} == "$0" ]] && dce.start_archive "$@"
@AfroThundr3007730
Copy link
Author

This deliberately only preserves media files (embeds and attachments) from the channel history. Quite useful for archiving image dumps and meme channels. For a utility that does incremental archiving and keeps the full channel message data, you may want to check out this other project which goes well with the frontent here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment