Last active
July 7, 2016 14:47
-
-
Save simonbru/b0cbe032f9711d1cfce41117dfd722a6 to your computer and use it in GitHub Desktop.
Script using curl and jq to backup reddit saved links/comments
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -eu | |
# This script requires curl and jq>=1.5 | |
export LC_ALL=C | |
curl='curl --user-agent RedditBackup -L' | |
fetch_saved() { | |
# Try to load credentials.conf if FEED_URL is not set | |
[[ ! -v FEED_URL && -e credentials.conf ]] && source credentials.conf | |
if ! [[ -v FEED_URL ]]; then | |
echo "Error: FEED_URL variable must be set in environment or in credentials.conf" >&2 | |
exit 1 | |
fi | |
output="$PWD/$1" | |
mkdir -p saved | |
local i=1 | |
local after="" | |
while true; do | |
fname="saved/${i}.json" | |
$curl "${FEED_URL}&limit=100&after=${after}" -o "$fname" | |
# Ensure that .data.children is there | |
if ! jq -e <"$fname" -e '.data | has("children")'; then | |
echo "Error when retrieving saved links:" | |
jq . <"$fname" | |
exit 1 | |
fi | |
if ! after="$(jq -r -e <$fname '.data.children[-1].data.name')"; then | |
break | |
fi | |
i=$(($i + 1)) | |
done | |
# Merge entries in one file | |
( | |
cd saved; | |
jq -s '[ .[] | .data.children[] ]' $(ls *.json | sort -n) >"$output"; | |
) | |
} | |
export_csv() { | |
local json_file="$1" | |
local query='.[].data | [.name, .subreddit, (.created_utc | todate), .title // .link_title, .body // ""] | @csv' | |
jq -r "$query" "$json_file" | |
} | |
trim_saved() { | |
local json_file="$1" | |
local query='[ .[] | | |
if .kind == "t3" then (.data | { | |
type: "post", | |
name, subreddit, score, num_comments, | |
created_utc_iso: (.created_utc | todate), | |
edited: (.edited | try todate catch null), | |
title, permalink, url | |
}) else (.data | { | |
type: "comment", | |
name, subreddit, score, | |
created_utc_iso: (.created_utc | todate), | |
edited: (.edited | try todate catch null), | |
link_title, body | |
}) end | |
]' | |
jq "$query" "$json_file" | |
} | |
usage() { | |
echo "Usage: $0 -s <output_file> | -c <input_json_file> | -h" | |
echo "-s <output_file>: Fetch reddit saved links and save JSON output" | |
echo "-c <input_json_file>: Convert saved JSON into trimmed CSV" | |
echo "-t <input_json_file>: Trim saved JSON to only keep essential fields (e.g. for readability)" | |
echo "-h: Show this help" | |
} | |
while getopts "s:c:t:h" opt; do | |
case $opt in | |
s) | |
fetch_saved "$OPTARG" | |
;; | |
c) | |
export_csv "$OPTARG" | |
;; | |
t) | |
trim_saved "$OPTARG" | |
;; | |
h) | |
usage | |
;; | |
\?) | |
usage >&2 | |
exit 1 | |
;; | |
esac | |
done | |
if [[ $# == 0 ]]; then | |
usage >&2 | |
exit 1 | |
fi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Saved links URL can be found in reddit preferences | |
FEED_URL="https://www.reddit.com/saved.json?feed=xxxxx&user=yyyyy" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment