Created
January 6, 2020 22:09
-
-
Save chaimleib/503c36984bc4dfe53f5336bc4c1d79c6 to your computer and use it in GitHub Desktop.
Convert TSV to JSON redirects using AWK
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Converts .tsv files with cols [ intl?, from, to ] into JSONs. | |
# | |
# Usage: | |
# 1. Go to our list of redirects at https://docs.google.com/spreadsheets/d/1u4hQyNn86R1xzh8G30e14rSbvQs9S6CtsIgmz0NNIKk/edit#gid=549054781 | |
# 2. Download each sheet as a .tsv. | |
# 3. path/to/json-redirects.sh ~/Downloads/*.tsv > path/to/marketing-dev/redirects.json | |
# | |
# In the results jsons, the keys are the "from" addresses, and the values the | |
# "to" addresses. "To" addresses on the marketing web site are rewritten to be | |
# relative to "/". Since the service hostname and other hostnames may change, | |
# those hostnames have been replaced with !help and !service so that they can | |
# be set by the server later per its configuration. | |
# awk program to reformat the src and dest urls | |
filterRedirects=' | |
function destUrl(u) { | |
gsub(/^https?:\/\/evernote.com\//, "/", u); # del own hostname for internal | |
gsub(/^https?:\/\/www.evernote.com\//, "!service/", u); # let go config host | |
gsub(/^https?:\/\/help.evernote.com\//, "!help/", u); # let go config host | |
return u; | |
} | |
function srcUrl(u) { | |
gsub(/^https?:\/\/evernote.com\//, "/", u); # del own hostname for srcUrl | |
gsub(/\/+$/, "", u); # strip trailing slashes | |
return u; | |
} | |
/^[A-Z]{2,}/ { next } | |
FNR > 1 { printf "%s\t%s\t%s\n", $3, srcUrl($1), destUrl($2) } | |
' | |
# awk program to convert .tsv into .json | |
makeJson=' | |
BEGIN { | |
FS="\t"; | |
printf "{\n"; | |
} | |
NR > 1 { | |
printf ",\n"; | |
} | |
{ | |
printf " \"%s\": \"%s\"", $2, $3; | |
} | |
END { | |
printf "\n}\n"; | |
} | |
' | |
# Function usage: jsonRedirects [yes/no] file.tsv[...] | |
# produce a JSON redirects map, given a "yes" or "no" first argument (isIntl?) | |
# and a list of tsv files following. | |
jsonRedirects() { | |
local isIntl="${1:-.+}" | |
shift | |
awk "$filterRedirects" "$@" | | |
grep -E "^${isIntl}\\b" | | |
awk "$makeJson" | |
} | |
# use jq to combine two JSONs into one. Non-intl redirects are under "en_only", | |
# and intl ones are under "intl". -s treats sequential input JSONs as if in an | |
# array, and -S sorts the keys. And if the output stream is a terminal, we have | |
# colors! | |
(jsonRedirects no "$@"; jsonRedirects yes "$@") | | |
jq -s -S '{"en_only": .[0], "intl": .[1]}' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment