Last active
April 7, 2017 23:32
-
-
Save jgreely/2338c72c825d2a93713e4f0fc0025985 to your computer and use it in GitHub Desktop.
create random Hugo content files for theme testing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# create Hugo content files from random Wikipedia articles; | |
# requires Pandoc to convert HTML to Markdown | |
# | |
read -r -d '' USAGE <<'EOF' | |
Usage: wikiblog.sh [options] | |
-a list of possible authors for an article (selects 1) | |
-c list of possible categories (select up to N-1) | |
-n number of articles to generate (default 10) | |
-s series to assign all articles to | |
-t list of possible tags (select up to N-1) | |
EOF | |
# http://mywiki.wooledge.org/BashFAQ/026 | |
shuffle() { | |
local i tmp size max rand | |
size=${#shuffled[*]} | |
for ((i=size-1; i>0; i--)); do | |
max=$(( 32768 / (i+1) * (i+1) )) | |
while (( (rand=$RANDOM) >= max )); do :; done | |
rand=$(( rand % (i+1) )) | |
tmp=${shuffled[i]} | |
shuffled[i]=${shuffled[rand]} | |
shuffled[rand]=$tmp | |
done | |
} | |
# https://gist.github.com/cdown/1163649 | |
urldecode() { | |
local url_encoded="${1//+/ }" | |
printf '%b' "${url_encoded//%/\\x}" | |
} | |
authors=(alice bob carol ted) | |
categories=(blog home food work pets games movies books music art) | |
tags=(red orange yellow green blue indigo violet white black grey) | |
number=10 | |
series= | |
while getopts ":a:c:n:s:t:" opt; do | |
case $opt in | |
a) | |
IFS=, read -ra authors <<< "$OPTARG" | |
;; | |
c) | |
IFS=, read -ra categories <<< "$OPTARG" | |
;; | |
n) | |
number="$OPTARG" | |
;; | |
s) | |
series="$OPTARG" | |
;; | |
t) | |
IFS=, read -ra tags <<< "$OPTARG" | |
;; | |
\?) | |
echo "$USAGE" | |
exit 1 | |
;; | |
esac | |
done | |
# if $series is set, make all articles part of it. | |
if [ "$series" ]; then | |
series='series= "'$series'"' | |
fi | |
WIKI="https://en.wikipedia.org/wiki/Special:Random" | |
count=1 | |
while [ $count -le $number ]; do | |
# random author | |
myauthor=${authors[$((RANDOM % ${#authors[@]}))]} | |
# random date in the past ~10 years | |
if [ $(uname) = "Darwin" ]; then | |
DATE=$(date -r $(( $(date +%s) - $RANDOM * 10000 )) \ | |
"+%Y-%m-%dT%H:%M:%S") | |
else | |
DATE=$(date -d @$(( $(date +%s) - $RANDOM * 10000 )) \ | |
"+%Y-%m-%dT%H:%M:%S") | |
fi | |
# random wikipedia article | |
URL=$(curl -s --max-redirs 0 -I $WIKI | tr -d '\015' | | |
awk '/^Location/{print $2}') | |
TITLE=$( urldecode $(basename $URL) | tr _ ' ' | sed -e 's/,/, /g' -e 's/ / /') | |
# random subset of categories | |
shuffled=("${categories[@]}") | |
shuffle | |
mycats=$(echo ${shuffled[@]:0:$((RANDOM % (${#shuffled[@]} - 1) + 1))} | | |
sed -e 's/ /", "/g') | |
# random subset of tags | |
shuffled=("${tags[@]}") | |
shuffle | |
mytags=$(echo ${shuffled[@]:0:$((RANDOM % (${#shuffled[@]} - 1) + 1))} | | |
sed -e 's/ /", "/g') | |
OUT=$(printf "wiki-%03d-%05d.md" $count $RANDOM) | |
echo "$count $TITLE" | |
cat > $OUT <<EOF | |
+++ | |
title = "$TITLE" | |
date = "$DATE" | |
source = "$URL" | |
categories = [ "$mycats" ] | |
tags = [ "$mytags" ] | |
$series | |
+++ | |
EOF | |
# retrieve the article and use Pandoc to convert it to Markdown | |
curl -s "$URL?action=render" | pandoc -f html -t markdown_github >> $OUT | |
count=$(( count + 1 )) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment