Created
February 9, 2021 01:05
-
-
Save jswrenn/171057e8841cd18129cfba31f8e8bf2c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
. `which env_parallel.bash` | |
env_parallel --session | |
function enrolled_total() { | |
jq -rc '"<parent>" + .regdemog_html + "</parent>"' \ | |
| ( xidel - -s --xpath='one-or-more(//p[@class="enroll_demog"]/text())' 2>/dev/null \ | |
|| echo "null" ) \ | |
| cut -d' ' -f3 | |
} | |
function enrolled_max() { | |
jq -rc '"<parent>" + .seats + "</parent>"' \ | |
| xidel - -s --xpath='one-or-more(//span[@class="seats_max"]/text())' 2>/dev/null \ | |
|| echo "null" | |
} | |
function enrolled_demog() { | |
jq -c 'if (.regdemog_json | length) > 0 then (.regdemog_json | fromjson) else null end' | |
} | |
function sections(){ | |
jq -rc '.all_sections' \ | |
| xidel -s - -e '//a/@data-key' \ | |
| xargs -I{} printf '{"%s}\n' {} \ | |
| sed -e 's/\:/": /g' \ | |
| jq -c --slurp '.' | |
} | |
function books() { | |
jq -rc '.books_html' | xidel -s - -e '//tr/[ | |
(th[1]/text()[last()], null)[1], | |
(td[1]/text()[last()], null)[1], | |
(td[2]/text()[last()], null)[1], | |
(td[3]/text()[last()], null)[1], | |
(td[4]/text()[last()], null)[1]]' \ | |
| head -n -1 | tail -n +2 \ | |
| jq -rc --slurp 'reduce .[] as $row ({"groups": {}, last: null}; | |
if ($row[0] != null) then | |
.last |= $row[0] | .groups[.last] |= [] | |
else | |
.groups[.last] += [{ | |
title: $row[1], | |
author: $row[2], | |
isbn: $row[3], | |
cost: (if ($row[4] == null) then null else (($row[4] | sub("\\$" ; ""; "gs") | split(" - "))[0] | tonumber) end), | |
}] | |
end) | .groups' | |
} | |
function instrs() { | |
jq -rc '.instructordetail_html' \ | |
| xidel -s - --xpath='//div[@class="instructor"]/[ | |
descendant::a[@data-id]/@data-id, | |
descendant::a[@data-id]/text(), | |
descendant::a[starts-with(@href,"mailto:")]/text()]' \ | |
| jq -c 'select(length > 0) | {id: .[0], name: .[1], email: .[2]}' \ | |
| jq -c --slurp '.' | |
} | |
function stats() { | |
# adjust as needed | |
FILE="$1" | |
CONTENTS="$(cat "$FILE")" | |
echo $CONTENTS | jq '.crn as $CourseCRN | | |
{ key: .key | tonumber, | |
db: .srcdb | tonumber, | |
crn: .crn | tonumber, | |
title: .title, | |
department: .code | split(" ")[0], | |
code: .code | split(" ")[1], | |
section: .section, | |
cancelled: (.stat == "C"), | |
enrollment: { | |
actual: $enrolled_total, | |
maximum: $enrolled_max, | |
demographics: $enrolled_demog, | |
}, | |
instructors: $instructors, | |
other_sections: ([$sections | .[] | select(.crn == $CourseCRN)]), | |
books: $books | |
}' "$FILE" \ | |
--argjson enrolled_total "$(echo $CONTENTS | enrolled_total "$FILE")" \ | |
--argjson enrolled_max "null" \ | |
--argjson enrolled_demog "null" \ | |
--argjson instructors "$(echo $CONTENTS | instrs "$FILE")" \ | |
--argjson sections "[]" \ | |
--argjson books "$(echo $CONTENTS | books "$FILE")" | |
#--argjson enrolled_total null \ # "$(echo $CONTENTS | enrolled_total "$FILE")" \ | |
#--argjson enrolled_max "$(echo $CONTENTS | enrolled_max "$FILE")" \ | |
#--argjson enrolled_demog "$(echo $CONTENTS | enrolled_demog "$FILE")" \ | |
#--argjson instructors "$(echo $CONTENTS | instrs "$FILE")" \ | |
#--argjson sections "$(echo $CONTENTS | sections "$FILE")" \ | |
#--argjson books "$(echo $CONTENTS | books "$FILE")" | |
} | |
function susp(){ | |
stats $1 | |
} | |
find db/202010/ -iname '*.json' | env_parallel --progress -j8 "susp {}" > summary-202010.json |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment