Created
July 20, 2021 22:36
-
-
Save ksuderman/5c460d563560b3458fa07e203d4339e6 to your computer and use it in GitHub Desktop.
My version of gxadmin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# gxadmin: Galaxy administration swiss army knife | |
# license: GPLv3 | |
# | |
# This is the result of admins across the universe working together :) | |
# | |
# Thanks to everyone for their contributions | |
# https://github.com/galaxyproject/gxadmin/graphs/contributors | |
version() { | |
echo 19 | |
} | |
GXADMIN_SITE_SPECIFIC=${GXADMIN_SITE_SPECIFIC:-~/.config/gxadmin-local.sh} | |
hexencodefield9=$(cat <<EOF | |
import csv | |
import binascii | |
import sys | |
csv.field_size_limit(sys.maxsize) | |
spamreader = csv.reader(sys.stdin, delimiter=',', quotechar='"') | |
spwamwrite = csv.writer(sys.stdout, delimiter=',', quotechar='"') | |
for row in spamreader: | |
if row[9][0] != "\\\\": | |
row[9] = "\\\\x" + binascii.hexlify(row[9]) | |
spwamwrite.writerow(row) | |
EOF | |
) | |
identicon_script=$(cat <<EOF | |
import sys | |
import hashlib | |
def colored(fg, bg, value): | |
return '\x1b[38;5;{0}m\x1b[48;5;{1}m{2}\x1b[0m'.format(fg, bg, value) | |
class Iden: | |
def build(self, h): | |
pixels = [[0 for _ in range(5)] for _ in range(5)] | |
for i in range(0, 5): | |
for x in range(0, 5): | |
pixels[x][i] = self.showPixel(i, x, h) | |
return pixels | |
def showPixel(self, x, y, h): | |
m = 6 + abs(2-x) * 5 + y | |
return int(h[m:m+1], 16) % 2 == 0 | |
def getIcon(self, num, hash_string): | |
for x in self.build(hash_string): | |
yield ''.join([ | |
colored(num, num + 15 % 255, '██' if q else ' ') | |
for q in x | |
]) | |
data = sys.stdin.read().strip() | |
m = hashlib.sha256() | |
m.update(data.encode('UTF-8')) | |
hash_string = m.hexdigest() | |
num = int(hash_string[0:2], 16) | |
for line in Iden().getIcon(num, hash_string): | |
print(line) | |
EOF | |
) | |
ifmore() { | |
q="$(cat)" | |
lines=$(echo "$q" | wc -l) | |
# 2 more than the head command since we'll 'spend' two for the ... anyway. | |
if (( lines > 8 )); then | |
echo "$q" | head -n 6 | |
echo " ..." | |
echo " run '$0 $1 help' for more" | |
else | |
echo "$q" | |
fi | |
} | |
colour_word() { | |
word=$1 | |
color=$2 | |
if [[ $color == "red" ]]; then | |
color_idx=1 | |
elif [[ $color == "orange" ]]; then | |
color_idx=2 | |
elif [[ $color == "green" ]]; then | |
color_idx=4 | |
fi | |
cat | sed "s|${word}|$(tput setab $color_idx)${word}$(tput sgr0)|g" | |
} | |
filter_commands() { | |
cat | grep "^$1 " | sort -k2 | column -s: -t | sed 's/^/ /' | colour_word Deprecated orange | colour_word '(NEW)' green | |
} | |
locate_cmds() { | |
grep -s -h -o '^[a-z0-9_-]*()\s*{ ##?\? .*' "$0" "$GXADMIN_SITE_SPECIFIC" | grep -v grep | grep -v '| sed' | sort | |
} | |
locate_cmds_nolocal() { | |
grep -s -h -o '^[a-z0-9_-]*()\s*{ ##?\? .*' "$0" | grep -v grep | grep -v '| sed' | sort | |
} | |
correct_cmd() { | |
cat | sed 's/_/ /;s/()\s*{ ##?\?//;s/ :/:/' | |
} | |
fzf_autorun_cmd() { | |
read -p "Would you like to run '$0 $res'? [N|y]: " choice | |
if [[ "$choice" == "y" ]] || [[ "$choice" == "Y" ]]; then | |
$0 $res | |
fi | |
} | |
didyoumean() { | |
# Given a query | |
# Is this a known subcommand | |
known_command=0 | |
for x in $registered_subcommands; do | |
if [[ "$1" == "$x" ]]; then | |
known_command=1 | |
fi | |
done | |
# If it is, we check the second part. | |
if (( known_command == 1 )); then | |
if [[ "$2" == "" ]]; then | |
usage "$1" | |
exit 1; | |
fi | |
error "Unknown subcommand: $1 $2" | |
warning "Did you mean one of the following?" | |
echo | |
if ! command -v fzf &> /dev/null; then | |
echo "(Top hits by levenshtein distance)" | |
locate_cmds | correct_cmd | grep "^$1 " | \ | |
cut -f 2 -d' ' | sed -s "s/://g;s/\$/ $2/g" | \ | |
levenshtein_filter | sed 's/^/ - [/;s/\t/]: /g' | |
else | |
res=$(locate_cmds | correct_cmd | grep "^$1 " | fzf -q "$2" --header="Unknown subcommand: $1 $2, did you mean..." | sed 's/[:<\[].*//g') | |
fzf_autorun_cmd | |
fi | |
exit 1; | |
else | |
error "Unknown command: $1" | |
usage | |
fi | |
exit 1; | |
} | |
usage(){ | |
cat <<-EOF | |
gxadmin usage: | |
EOF | |
if (( $# == 0 )); then | |
for x in $registered_subcommands; do | |
vn="_${x}_short_help" | |
printf " %10s: %s\n" "${x}" "${!vn}" | |
done | |
else | |
for x in $registered_subcommands; do | |
if [[ "$1" == "$x" ]]; then | |
vn="_${x}_short_help" | |
echo "${!vn}" | |
vn="_${x}_long_help" | |
echo "${!vn}" | |
fi | |
done | |
fi | |
if [[ -f "$GXADMIN_SITE_SPECIFIC" ]]; then | |
if (( $# == 0 )) || [[ "$1" == "local" ]]; then | |
printf " %10s: %s\n" "local" "(These can be configured in "$GXADMIN_SITE_SPECIFIC")" | |
fi | |
else | |
cat <<-EOF | |
Local-only commands can be configured in $GXADMIN_SITE_SPECIFIC | |
EOF | |
fi | |
if (( $# == 0 )) || [[ "$1" == "search" ]]; then | |
cat <<-EOF | |
search <term>: Search gxadmin for functions with specific terms | |
EOF | |
fi | |
if (( $# == 1 )); then | |
echo | |
locate_cmds | correct_cmd | filter_commands "$1" | |
fi | |
cat <<-EOF | |
All commands can be prefixed with "time" to print execution time to stderr | |
EOF | |
cat <<-EOF | |
help / -h / --help : this message. Invoke '--help' on any subcommand for help specific to that subcommand | |
Tip: Run "gxadmin meta whatsnew" to find out what's new in this release! | |
EOF | |
exit 0; | |
} | |
handle_help() { | |
if [[ ! -z "${GXADMIN_POPCON_ENABLE}" ]]; then | |
if [[ "${query_name}" != "user-info" ]]; then | |
echo "${mode:-mode} ${query_name:-sf}" >> ~/.gxadmin-popcon.log | |
fi | |
fi | |
for i in "$@"; do | |
if [[ "$i" = --help || "$i" = -h ]]; then | |
if [[ -n "${query_name}" ]]; then | |
key="${query_type}_${query_name}" | |
fi | |
invoke_desc=$(locate_cmds | grep "${key}()" | correct_cmd | sed "s/^/gxadmin /g") | |
short_desc=$(echo "$invoke_desc" | sed 's/.*://g') | |
short_parm=$(echo "$invoke_desc" | sed 's/:.*//g') | |
echo "${mode} ${query_name} - ${short_desc}" | |
echo | |
echo "**SYNOPSIS**" | |
echo | |
echo " $short_parm" | |
echo | |
manual="$(cat -)" | |
manual_wc="${#manual}" | |
if (( manual_wc > 3 )); then | |
echo "**NOTES**" | |
echo | |
echo "$manual" | |
echo | |
fi | |
# exit after printing the documentation! | |
exit 0; | |
fi | |
done | |
} | |
assert_restart_lock(){ | |
if [ -f "$HOME/.restart-lock" ]; then | |
echo "A restart lock exists. This means someone is probably already restarting galaxy." | |
exit 3 | |
fi | |
} | |
assert_set_env() { | |
env_var=$1 | |
if [[ -z "${!env_var}" ]]; then | |
error Please set "\$${env_var}" | |
exit 1 | |
fi | |
} | |
wait_for_url() { | |
url=$1; shift; | |
while [ "$(curl --connect-timeout 5 --silent "$url" | wc -c)" -eq "0" ]; do | |
sleep 5; | |
echo -n '.' | |
done | |
} | |
assert_count() { | |
if (( $1 != $2 )); then | |
error "$3" | |
exit 1 | |
fi | |
} | |
assert_count_ge() { | |
if (( $1 < $2 )); then | |
error "$3" | |
exit 1 | |
fi | |
} | |
assert_file() { | |
if [[ ! -f "$1" ]]; then | |
error "File $1 does not exist" | |
exit 1 | |
fi | |
} | |
assert_file_warn() { | |
if [[ ! -f "$1" ]]; then | |
warning "File $1 does not exist" | |
fi | |
} | |
query_tbl() { | |
psql <<-EOF | |
$1 | |
EOF | |
} | |
query_tsv() { | |
psql <<-EOF | |
COPY ($1) to STDOUT with CSV DELIMITER E'\t' | |
EOF | |
} | |
query_json() { | |
psql <<-EOF | |
COPY ( | |
SELECT array_to_json(array_agg(row_to_json(t))) | |
FROM ($1) t | |
) to STDOUT with (FORMAT CSV, QUOTE ' ') | |
EOF | |
} | |
query_tsv_json() { | |
psql <<-EOF | |
COPY ($1) to STDOUT with (FORMAT CSV, QUOTE ' ') | |
EOF | |
} | |
query_csv() { | |
psql <<-EOF | |
COPY ($1) to STDOUT with CSV DELIMITER ',' | |
EOF | |
} | |
query_exp() { | |
psql <<-EOF | |
EXPLAIN ANALYZE VERBOSE $1 | |
EOF | |
} | |
query_expj() { | |
echo "$1" | |
echo | |
psql -qAt <<-EOF | |
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) $1 | |
EOF | |
} | |
query_echo() { | |
echo "$1" | |
} | |
query_influx() { | |
local query="$1" | |
local rename="$2" | |
local fields="$3" | |
local tags="$4" | |
local timestamp="$5" | |
if [[ -z "$fields" ]]; then | |
exit 0; | |
fi | |
arr2py=$(cat <<EOF | |
import sys | |
query_name = sys.argv[1] | |
fields = {x.split('=')[0]: int(x.split('=')[1]) for x in sys.argv[2].split(';')} | |
tags = [] | |
if len(sys.argv) > 3 and len(sys.argv[3]) > 0: | |
tags = {x.split('=')[0]: int(x.split('=')[1]) for x in sys.argv[3].split(';')} | |
timestamp = None | |
if len(sys.argv) > 4 and sys.argv[4] != '': | |
timestamp = int(sys.argv[4]) | |
for line in sys.stdin.read().split('\n'): | |
if len(line) == 0: | |
continue | |
parsed = line.split('\t') | |
metric = query_name | |
if len(tags): | |
tag_data = ['%s=%s' % (k, parsed[v].replace(' ', '\\ ').replace(',', '\\,').replace('=', '\\=')) for (k, v) in tags.items()] | |
metric += ',' + ','.join(tag_data) | |
field_data = ['%s=%s' % (k, parsed[v]) for (k, v) in fields.items()] | |
metric += ' ' + ','.join(field_data) | |
if timestamp is not None: | |
metric += ' ' + str(parsed[timestamp]) | |
print(metric) | |
EOF | |
) | |
psql -c "COPY ($query) to STDOUT with CSV DELIMITER E'\t'"| $GXADMIN_PYTHON -c "$arr2py" "$rename" "$fields" "$tags" "$timestamp" | |
} | |
gdpr_safe() { | |
local coalesce_to | |
if (( $# > 2 )); then | |
coalesce_to="$3" | |
else | |
coalesce_to="__UNKNOWN__" | |
fi | |
if [ -z "$GDPR_MODE" ]; then | |
echo "COALESCE($1::text, '$coalesce_to') as $2" | |
else | |
# Try and be privacy respecting while generating numbers that can allow | |
# linking data across tables if need be? | |
echo "substring(md5(COALESCE($1::text, '$coalesce_to') || now()::date || '$GDPR_MODE'), 0, 12) as ${2:-$1}" | |
fi | |
} | |
# Borrowed from https://stackoverflow.com/questions/1527049/how-can-i-join-elements-of-an-array-in-bash | |
function join_by { | |
local d=$1; shift; | |
echo -n "$1"; | |
shift; | |
printf "%s" "${@/#/$d}"; | |
} | |
summary_statistics() { | |
local v=$1 | |
local ishuman=$2 | |
# TODO: there has got to be a less ugly way to do this | |
if (( ishuman == 1 )); then | |
human_size="pg_size_pretty(" | |
human_after=")" | |
else | |
human_size="" | |
human_after="" | |
fi | |
cat <<-EOF | |
${human_size}min($v)${human_after} AS min, | |
${human_size}percentile_cont(0.25) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS quant_1st, | |
${human_size}percentile_cont(0.50) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS median, | |
${human_size}avg($v)${human_after} AS mean, | |
${human_size}percentile_cont(0.75) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS quant_3rd, | |
${human_size}percentile_cont(0.95) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS perc_95, | |
${human_size}percentile_cont(0.99) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS perc_99, | |
${human_size}max($v)${human_after} AS max, | |
${human_size}sum($v)${human_after} AS sum, | |
${human_size}stddev($v)${human_after} AS stddev | |
EOF | |
} | |
get_user_filter(){ | |
echo "(galaxy_user.email = '$1' or galaxy_user.username = '$1' or galaxy_user.id = CAST('$1' AS INTEGER))" | |
} | |
error() { | |
(>&2 echo "$(tput setab 1)$*$(tput sgr0)") | |
} | |
warning() { | |
(>&2 echo "$(tput setab 2)$*$(tput sgr0)") | |
} | |
success() { | |
echo "$(tput setaf 40)$*$(tput sgr0)" | |
} | |
# Find a python binary, hopefully. | |
if [[ -z "${GXADMIN_PYTHON}" ]]; then | |
if hash python3 2>/dev/null; then | |
export GXADMIN_PYTHON=$(command -v python3) | |
elif hash python2 >/dev/null; then | |
export GXADMIN_PYTHON=$(command -v python2) | |
elif hash python >/dev/null; then | |
export GXADMIN_PYTHON=$(command -v python) | |
else | |
warning "Some features require python support, sorry. If you have python installed somewhere that is not on the path or under a weird name, you can set GXADMIN_PYTHON to the path." | |
fi | |
fi | |
CHANGELOG=$(cat <<EOF | |
# 20-pre | |
- Fixed: | |
- Broken things in preview anonymisation script | |
- Unified uwsgi-status command (thanks @gmauro) | |
- Unified galaxy user/email/id filtering and made it stricter in what it accepts ([#60](https://github.com/galaxyproject/gxadmin/issues/60)). | |
- Updated "query user-disk-usage" to add a "--use-precalc" flag which uses the value calculated by Galaxy. | |
- Enabled GXADMIN_PYTHON from the environment, by [@gmauro](https://github.com/gmauro). | |
- Updated "galaxy cleanup" to avoid to set update_time on updated objects, by [@gmauro](https://github.com/gmauro). | |
- fixed broken latest-users query | |
- fixed broken errored-jobs query, by [@gmauro](https://github.com/gmauro). | |
- several fixes in galaxy ie-show command, [@gmauro](https://github.com/gmauro). | |
- Generalized the "uwsgi lastlog" function to proper handle journalctl's output, by [@gmauro](https://github.com/gmauro). | |
- Added: | |
- query dump-users, for [@shiltemann](https://github.com/shiltemann) | |
- "did you mean" when a command isn't found. | |
- query job-metrics, by [@anuprulez](https://github.com/anuprulez) and [@simonbray](https://github.com/simonbray). | |
- uwsgi active-user count | |
- exports PGAPPNAME setting the application name for postgres connections. This becomes visible in pg_stat_activity. | |
- mutate set_quota_for_oidc_user, by [@gmauro](https://github.com/gmauro). | |
- iquery support for queue-detail query | |
- query tool-usage-over-time which can be used to make charts of specific tools over time. | |
- improved use of FZF for both the search and 'typo' functions, when it is available. | |
- query pulsar-gb-transferred: sums up data in/out (minus collections) of pulsar. | |
# 19 | |
- Fixed: | |
- Fixed broken slurping from previous release (Thanks [@slugger70](https://github.com/Slugger70) for catching it.) | |
- Made changelog updates mandatory so I don't have to ask people for it. Finally. | |
- Added: | |
- encode/decode ID | |
- Benchmarking via ASV (preview) | |
- anonymise-db-for-release (preview) | |
- Add '--by_group' flag to several monthly queries (Thanks [@slugger70](https://github.com/Slugger70)) | |
- query monthly-cpu-stats (Thanks [@gmauro](https://github.com/gmauro)) | |
- workflow trace archive commands | |
- query queue has a new optional parameter for splitting queues by various attributes (Thanks @natefoo) | |
- query pg-rows-per-table to find out which tables have data. | |
- mutate fail-wfi | |
- mutate oidc-by-emails, mutate users affected by https://github.com/galaxyproject/galaxy/issues/9981 | |
- mutate now supports echo/explain prefixes. | |
- very-unsafe flag to most mutate methods, avoiding the transaction. | |
- added the [wonderful argument parsing](https://github.com/hexylena/wap) for parsing function signatures into arguments automatically, making contribution easier. | |
# 18 | |
- Fixed: | |
- All previous '--nice' flags are rewritten to '--human' (Thanks @lldelisle) | |
- Update 'user-disk-usage' to support iquery (Thanks @lldelisle) | |
- Update 'largest-histories' to support iquery (Thanks @lldelisle) | |
- uwsgi-memory now looks at workflow schedulers | |
- Exposed bash autocompletion and documented it | |
- Added: | |
- query workers, only works on Galaxy 20.01 or newer and retrieves the hostname and PID of Galaxy worker processes | |
- '--details' option to query errored-jobs to include the job_stderr column | |
- total number of jobs with exit states for galaxxy instance (Thanks @bruggerk) | |
- query workflow-invocation-totals | |
- server subcommand exposing previously hidden functions | |
- Removed: | |
- filter hexdecodelines, this is now replaced by a built-in postgres function | |
- Changed: | |
- Refactored internal function search to unify local, query, mutate, with the rest. | |
# 17 | |
Testing our new release message | |
- Fixed: | |
- Issue with py3 only machines | |
- Added: | |
- jobs ready to run | |
# 16 | |
- Fixed: | |
- figured out how to decode hex blobs in postgres | |
# 15 | |
- Fancy [new docs site](https://galaxyproject.github.io/gxadmin/#/) | |
- Added: | |
- search :( | |
- query server-groups-allocated-cpu, thanks @selten | |
- query server-groups-disk-usage, thanks @selten | |
- query user-disk-quota, thanks @selten | |
- query job-info, thanks @selten | |
- query queue-detail-by-handler | |
- mutate reassign-job-to-handler | |
- a LOT of query pg-... commands from [heroku's pg-extras](https://github.com/heroku/heroku-pg-extras/tree/master/commands) | |
- mutate drop-extraneous-workflow-step-output-associations | |
- mutate reassign-workflows-to-handler | |
- query data-origin-distribution | |
- query data-origin-distribution-summary | |
- query user-history-list | |
# 14 | |
- Added: | |
- Job working directory cleaner | |
- meta slurp-upto | |
- meta slurp-day | |
- Imported jobs-queued-internal-by-handler and jobs-queued queries from main | |
- query users-with-oidc | |
- Three new queries for checking history execution time, thanks [@mmiladi](https://github.com/mmiladi) | |
- query history-runtime-system-by-tool | |
- query history-runtime-system | |
- query history-runtime-wallclock | |
- Three new queries for checking potentially broken tools, thanks [@jmchilton](https://github.com/jmchilton) | |
- query tool-new-errors | |
- query tool-errors | |
- query tool-likely-broken | |
- Import script from https://github.com/galaxyproject/grafana-dashboards | |
- query for datasets uploaded in last N hours | |
- mutate to approve users | |
- mutate assign-unassigned-workflow: workaround for [galaxyproject/galaxy#8209](https://github.com/galaxyproject/galaxy#8209) | |
- mutate oidc-role-find-affected: workaround for [galaxyproject/galaxy#8244](https://github.com/galaxyproject/galaxy#8244) | |
- mutate oidc-role-fix: workaround for [galaxyproject/galaxy#8244](https://github.com/galaxyproject/galaxy#8244) | |
- query user-disk-usage, thanks @selten | |
- query group-cpu-seconds, thanks @selten | |
- Fixed: | |
- Correct bug in queue-detail and a couple other functions which did not | |
correctly include data from anonymous users. | |
# 13 | |
- Added: | |
- local functions support querying when prefixed with "query-" | |
- "meta influx-post" and "meta influx-query" were added to get data into | |
and out of Influx | |
- "explainquery" is added as an alternative to csv/tsv/etc queries, which | |
does an "EXPLAIN ANALYZE" of the current SQL | |
- "explainjsonquery" added for use with http://tatiyants.com/pev/ | |
- "time" prefix for all functions added to print execution time to stderr | |
- Function to fail a specific job ID | |
- "query user-recent-aggregate-jobs" | |
- "query history-contents" | |
- "query hdca-info" | |
- "query hdca-datasets" | |
- "mutate fail-history", when failing individual jobs is too slow | |
- "galaxy migrate-tool-install-from-sqlite" | |
- "query user-cpu-years" | |
- Shellchecking of entire script | |
- Fixed: | |
- Escaped commas in influx outputs, switched to tabs to further prevent | |
comma issues. | |
- Correct date filter in "query {server-groups,server-datasets}" | |
- Removed unnecessary function name duplication, e.g. | |
"query_filter() { ## query filter [id]: Does thing" | |
can now be written as | |
"query_filter() { ## [id]: Does thing" | |
# 12 | |
- Added: | |
- local functions, users can add functions to a separate file which are | |
made available in gxadmin. @erasche resisted implementing these for a | |
long time for fears that they won't be contributed back and everyone | |
would keep their precious sql private. So they may nag you occasionally | |
to contribute them back. | |
- (semi) EU specific systemd handler/zergling management commands | |
- filter digest-color: command to colour some text | |
- filter identicon: command to generate an identicon | |
- Some basic testing of the overall script | |
- Fixed: | |
- Correct time zones in all queries to be client-side time zones rather | |
than UTC encoded timestamps as stored in db (Thanks [@slugger70](https://github.com/Slugger70)) | |
- Renamed: "query monthly-users" → "query monthly-users-active" | |
- Removed: | |
- Removed highly EU specific handler and zerg functions | |
- Deprecated: "query active-users" | |
- Changed: | |
- Major internal reorganisation and split into parts for easier editing. A | |
bash script that needs a build system, truly horrifying. | |
- User info query now shows recent WF invocations and whether their largest | |
histories are deleted or purged or not. | |
- Separated out 'report' types that are markdown-only outputs and don't | |
support tsv/csv queries | |
# 11 | |
- Added: | |
- filter hexdecode: to help decode hex blobs in postgres queries | |
- uwsgi stats\_influx: which fetches stats from uWSGI zerglings | |
- filter pg2md: Convert postgres tables into markdown compatible tables | |
- "GDPR_MODE" env var which blanks out usernames and emails | |
- errored-jobs: Lists jobs that errored in the last N hours | |
- workflow-connections: Exports worfklow connections, input to output tools | |
- dump-config: Dump Galaxy configuration as JSON | |
- tool-popularity: Most popular tools per month | |
- Changed: | |
- user-details: now reports in markdown compatible output and with more | |
information about the user | |
- job-info: now reports in markdown compatible output, including job inputs | |
+ outputs | |
- queue-overview: now includes user ID by default ("GDPR_MODE=1" will set | |
the value to "0"), and allows using shorter tool IDs | |
- user-details: renamed to user-info | |
- user-info: includes largest histories | |
# 10 | |
- Added: | |
- old-histories: Old histories query to find histories that haven't been | |
used for X weeks. | |
- user-details: query details about a specific user ID | |
- Fixed: | |
- Included date in cleanup logs | |
# 9 | |
- Added: | |
- Influx queries for: | |
- active-users | |
- collection-usage | |
- disk-usage | |
- groups-list | |
- largest-collection | |
- queue | |
- queue-overview | |
- tool-usage | |
- ts-repos | |
- users-count | |
- users-total | |
- tool-metrics | |
- tool-available-metrics | |
- largest-collection | |
- Fixed: | |
- Improved "latest-users" to include any groups they're part of and their | |
registration status (ack'd email/not) | |
- Influx queries now functional (but not automatic) | |
- Collection usage | |
# 8 | |
- Added: | |
- Help documentation in-tool | |
- Fixed: | |
- rewrote internals to use functions + case/esac | |
# 7 | |
- Added: | |
- cleanup | |
- Fixed: | |
- removed all temporary tables for CTEs, allowing tsv/csv queries for | |
everything | |
- internal cleanup | |
# 6 | |
- Added functions: | |
- active-users | |
- training-memberof | |
- training-remove-member | |
- Fixed: | |
- update script | |
- removed more EU paths for env vars | |
# 5 | |
- Added functions: | |
- disk-usage | |
- users-count | |
- users-total | |
- groups-list | |
- collection-usage | |
- ts-repos | |
- Fixed job-inputs/job-outputs | |
- Fixed internal variable names | |
# 4 | |
- Implemented update function | |
- removed all support for legacy "_" calls | |
- Fancy new automatically updated help function | |
# 3 | |
- Added migration to sqlite for toolshed install database | |
- Removed some EU specific stuff | |
# 2 | |
- Added training-queue and job-history functions | |
- replaced "_" with "-" | |
# 1 - Initial Release | |
Included functions for: | |
- validating configs | |
- zerg tasks | |
- handler tasks | |
- queries: | |
- latest-users | |
- tool-usage | |
- job-info | |
- job-outputs | |
- queue | |
- queue-detail | |
- recent-jobs | |
- jobs-per-user | |
- runtime-per-user | |
- training | |
- training-members | |
- queue-time | |
- datasets-created-daily | |
EOF | |
) | |
registered_subcommands="$registered_subcommands config" | |
_config_short_help="Access Galaxy configuration and other assorted things" | |
config_validate() { ## : validate config files | |
handle_help "$@" <<-EOF | |
Validate the configuration files | |
**Warning**: | |
!> - This requires you to have \`\$GALAXY_DIST\` set and to have config under \`\$GALAXY_DIST/config\`. | |
!> - This only validates that it is well formed XML, and does **not** validate against any schemas. | |
!> | |
!> $ gxadmin validate | |
!> OK: galaxy-dist/data_manager_conf.xml | |
!> ... | |
!> OK: galaxy-dist/config/tool_data_table_conf.xml | |
!> OK: galaxy-dist/config/tool_sheds_conf.xml | |
!> All XML files validated | |
EOF | |
assert_set_env GALAXY_CONFIG_DIR | |
assert_set_env GALAXY_MUTABLE_CONFIG_DIR | |
fail_count=0 | |
for file in ${GALAXY_CONFIG_DIR}/*.xml; do | |
xmllint "$file" > /dev/null 2>/dev/null; | |
ec=$? | |
if (( ec > 0 )); then | |
fail_count=$(echo "$fail_count + 1" | bc) | |
error " FAIL: $file ($ec)"; | |
else | |
success " OK: $file"; | |
fi | |
done; | |
for file in ${GALAXY_MUTABLE_CONFIG_DIR}/*.xml; do | |
xmllint "$file" > /dev/null 2>/dev/null; | |
ec=$? | |
if (( ec > 0 )); then | |
fail_count=$(echo "$fail_count + 1" | bc) | |
error " FAIL: $file ($ec)"; | |
else | |
success " OK: $file"; | |
fi | |
done; | |
if (( fail_count == 0 )); then | |
success "All XML files validated" | |
else | |
error "XML validation failed, cancelling any actions." | |
exit 1 | |
fi | |
} | |
config_dump() { ## : Dump Galaxy configuration as JSON | |
handle_help "$@" <<-EOF | |
This function was added with the intention to use it internally, but it may be useful in your workflows. It uses the python code from the Galaxy codebase in order to properly load the configuration which is then dumped as JSON. | |
(.venv)$ gxadmin dump-config | jq -S . | head | |
{ | |
"activation_grace_period": 3, | |
"admin_users": "[email protected]", | |
"admin_users_list": [ | |
"[email protected]" | |
], | |
"allow_library_path_paste": false, | |
"allow_path_paste": false, | |
"allow_user_creation": true, | |
"allow_user_dataset_purge": true, | |
EOF | |
assert_set_env GALAXY_ROOT | |
assert_set_env GALAXY_CONFIG_FILE | |
dump_config_python=$(cat <<EOF | |
import argparse | |
import json | |
import os | |
import sys | |
sys.path.insert(1, '$GALAXY_ROOT/lib') | |
import galaxy.config | |
from galaxy.util.script import app_properties_from_args, populate_config_args | |
parser = argparse.ArgumentParser() | |
populate_config_args(parser) | |
args = parser.parse_args() | |
args.config_file = '$GALAXY_CONFIG_FILE' | |
app_properties = app_properties_from_args(args) | |
config = galaxy.config.Configuration(**app_properties) | |
sys.stdout.write(json.dumps(config.__dict__, default=lambda o: '<fail>')) | |
EOF | |
) | |
echo "$dump_config_python" | $GXADMIN_PYTHON | |
} | |
registered_subcommands="$registered_subcommands mutate" | |
_mutate_short_help="DB Mutations, CSV/TSV queries are NOT available" | |
txn_prefix() { | |
if [[ "$1" == "--very-unsafe" ]]; then | |
printf ""; | |
else | |
printf "BEGIN TRANSACTION;\n" | |
fi | |
} | |
txn_postfix() { | |
if [[ "$1" == "--commit" ]] || [[ "$1" == "1" ]]; then | |
printf "\nCOMMIT;\n" | |
elif [[ "$1" == "--very-unsafe" ]]; then | |
printf ""; | |
else | |
printf "\nROLLBACK;\n" | |
fi | |
} | |
mutate_fail-terminal-datasets() { ## [--commit]: Causes the output datasets of jobs which were manually failed, to be marked as failed | |
handle_help "$@" <<-EOF | |
Whenever an admin marks a job as failed manually (e.g. by updating the | |
state in the database), the output datasets are not accordingly updated | |
by default. And this causes users to mistakenly think their jobs are | |
still running when they have long since failed. | |
This command provides a way to select those jobs in error states | |
(deleted, deleted_new, error, error_manually_dropped, | |
new_manually_dropped), find their associated output datasets, and fail | |
them with a blurb mentionining that they should contact the admin in | |
case of any question | |
Running without any arguments will execute the command within a | |
transaction and then roll it back, allowing you to see counts of rows | |
and giving you an idea if it is doing the right thing. | |
**WARNINGS** | |
!> This does NOT currently work on collections | |
**EXAMPLES** | |
The process is to first query how many datasets will be failed, if this looks correct you're ready to go. | |
$ gxadmin mutate fail-terminal-datasets | |
BEGIN | |
SELECT 1 | |
jobs_per_month_to_be_failed | count | |
-----------------------------+------- | |
2019-02-01 00:00:00 | 1 | |
(1 row) | |
UPDATE 1 | |
UPDATE 1 | |
ROLLBACK | |
Then to run with the --commit flag to commit the changes | |
$ gxadmin mutate fail-terminal-datasets --commit | |
BEGIN | |
SELECT 1 | |
jobs_per_month_to_be_failed | count | |
-----------------------------+------- | |
2019-02-01 00:00:00 | 1 | |
(1 row) | |
UPDATE 1 | |
UPDATE 1 | |
COMMIT | |
EOF | |
# TODO(hxr): support collections | |
read -r -d '' QUERY <<-EOF | |
CREATE TEMP TABLE terminal_jobs_temp AS | |
SELECT | |
dataset.id as ds_id, | |
history_dataset_association.id as hda_id, | |
dataset.create_time AT TIME ZONE 'UTC' as ds_create | |
FROM | |
dataset, | |
history_dataset_association, | |
job_to_output_dataset, | |
job | |
WHERE | |
dataset.id = history_dataset_association.dataset_id | |
AND history_dataset_association.id = job_to_output_dataset.dataset_id | |
AND job.id = job_to_output_dataset.job_id | |
AND dataset.state IN ('queued', 'running', 'new') | |
AND job.state | |
IN ('deleted', 'deleted_new', 'error', 'error_manually_dropped', 'new_manually_dropped'); | |
SELECT | |
date_trunc('month', ds_create) as jobs_per_month_to_be_failed, count(*) | |
FROM terminal_jobs_temp | |
GROUP BY jobs_per_month_to_be_failed | |
ORDER BY date_trunc('month', ds_create) desc; | |
UPDATE dataset | |
SET | |
state = 'error' | |
WHERE id in (select ds_id from terminal_jobs_temp); | |
UPDATE history_dataset_association | |
SET | |
blurb = 'execution error', | |
info = 'This dataset''s job failed and has been manually addressed by a Galaxy administrator. Please use the bug icon to report this if you need assistance.' | |
WHERE id in (select hda_id from terminal_jobs_temp) | |
EOF | |
txn_pre=$(txn_prefix "$1") | |
txn_pos=$(txn_postfix "$1") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_fail-job() { ## <job_id> [--commit]: Sets a job state to error | |
handle_help "$@" <<-EOF | |
Sets a job's state to "error" | |
EOF | |
assert_count_ge $# 1 "Must supply a job ID" | |
id=$1 | |
read -r -d '' QUERY <<-EOF | |
UPDATE | |
job | |
SET | |
state = 'error' | |
WHERE | |
id = '$id' | |
EOF | |
txn_pre=$(txn_prefix "$2") | |
txn_pos=$(txn_postfix "$2") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_fail-history() { ## <history_id> [--commit]: Mark all jobs within a history to state error | |
handle_help "$@" <<-EOF | |
Set all jobs within a history to error | |
EOF | |
assert_count_ge $# 1 "Must supply a history ID" | |
id=$1 | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, state | |
FROM | |
job | |
WHERE | |
id | |
IN ( | |
SELECT | |
job_id | |
FROM | |
job_to_output_dataset | |
WHERE | |
dataset_id | |
IN ( | |
SELECT | |
id | |
FROM | |
history_dataset_association | |
WHERE | |
history_id = $1 | |
) | |
) | |
AND state NOT IN ('ok', 'error') | |
EOF | |
txn_pre=$(txn_prefix "$2") | |
txn_pos=$(txn_postfix "$2") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_delete-group-role() { ## <group_name> [--commit]: Remove the group, role, and any user-group + user-role associations | |
handle_help "$@" <<-EOF | |
Wipe out a group+role, and user associations. | |
EOF | |
assert_count_ge $# 1 "Must supply a group name" | |
id=$1 | |
read -r -d '' QUERY <<-EOF | |
DELETE FROM group_role_association | |
WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1'); | |
DELETE FROM user_group_association | |
WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1'); | |
DELETE FROM user_role_association | |
WHERE role_id = (SELECT role_id FROM group_role_association WHERE group_id= (SELECT id FROM galaxy_group WHERE name = '$1')); | |
DELETE FROM role | |
WHERE id = (SELECT role_id FROM group_role_association WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1')); | |
DELETE FROM galaxy_group | |
WHERE name = '$1' | |
EOF | |
txn_pre=$(txn_prefix "$2") | |
txn_pos=$(txn_postfix "$2") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_assign-unassigned-workflows() { ## <handler_prefix> <handler_count> [--commit]: Randomly assigns unassigned workflows to handlers. Workaround for galaxyproject/galaxy#8209 | |
handle_help "$@" <<-EOF | |
Workaround for https://github.com/galaxyproject/galaxy/issues/8209 | |
Handler names should have number as postfix, so "some_string_##". In | |
this case handler_prefix is "some_string_" and count is however many | |
handlers you want to schedule workflows across. | |
EOF | |
assert_count_ge $# 1 "Must supply a handler_prefix" | |
assert_count_ge $# 2 "Must supply a count" | |
prefix=$1 | |
count=$2 | |
read -r -d '' QUERY <<-EOF | |
UPDATE workflow_invocation | |
SET handler = '$prefix' || (random() * $count)::integer | |
WHERE state = 'new' and handler = '_default_' | |
RETURNING workflow_invocation.id | |
EOF | |
txn_pre=$(txn_prefix "$3") | |
txn_pos=$(txn_postfix "$3") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_reassign-workflows-to-handler() { ## <handler_from> <handler_to> [--commit]: Reassign workflows in 'new' state to a different handler. | |
handle_help "$@" <<-EOF | |
Another workaround for https://github.com/galaxyproject/galaxy/issues/8209 | |
Need to use the full handler names e.g. handler_main_0 | |
EOF | |
assert_count_ge $# 1 "Must supply a handler_from" | |
assert_count_ge $# 2 "Must supply a handler_to" | |
read -r -d '' QUERY <<-EOF | |
UPDATE workflow_invocation | |
SET handler = '$2' | |
WHERE state = 'new' and handler = '$1' | |
RETURNING workflow_invocation.id | |
EOF | |
txn_pre=$(txn_prefix "$3") | |
txn_pos=$(txn_postfix "$3") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_approve-user() { ## <username|email|user_id>: Approve a user in the database | |
handle_help "$@" <<-EOF | |
There is no --commit flag on this because it is relatively safe | |
EOF | |
assert_count_ge $# 1 "Must supply a username/email/user-id" | |
user_filter=$(get_user_filter "$1") | |
read -r -d '' QUERY <<-EOF | |
UPDATE galaxy_user | |
SET active = true | |
WHERE $user_filter | |
EOF | |
} | |
mutate_oidc-role-find-affected() { ## : Find users affected by galaxyproject/galaxy#8244 | |
handle_help "$@" <<-EOF | |
Workaround for https://github.com/galaxyproject/galaxy/issues/8244 | |
This finds all of the OIDC authenticated users which do not have any | |
roles associated to them. (Should be sufficient?) | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
user_id | |
FROM | |
oidc_user_authnz_tokens AS ouat | |
WHERE | |
ouat.user_id NOT IN (SELECT user_id FROM user_role_association) | |
EOF | |
# Not proud of this. | |
FLAVOR='tsv' | |
} | |
mutate_oidc-role-fix() { ## <username|email|user_id>: Fix permissions for users logged in via OIDC. Workaround for galaxyproject/galaxy#8244 | |
handle_help "$@" <<-EOF | |
Workaround for https://github.com/galaxyproject/galaxy/issues/8244 | |
EOF | |
user_filter=$(get_user_filter "$1") | |
# Coerce to user ID | |
read -r -d '' qstr <<-EOF | |
SELECT id, email | |
FROM galaxy_user | |
WHERE $user_filter | |
EOF | |
echo "QUERY: $qstr" | |
results="$(query_tsv "$qstr")" | |
echo "RESULTS: $results" | |
user_id=$(echo "$results" | awk '{print $1}') | |
email=$(echo "$results" | awk '{print $2}') | |
# check if there is an existing role. | |
read -r -d '' qstr <<-EOF | |
select count(*) from user_role_association left join role on user_role_association.role_id = role.id where user_id = $user_id and role.type = 'private' | |
EOF | |
echo "QUERY: $qstr" | |
results="$(query_tsv "$qstr")" | |
echo "RESULTS: $results" | |
# Some (mild) sanity checking. Should probably wrap this up in some nice. | |
if (( results != 0 )); then | |
error "A default private role already exists for this account." | |
exit 1 | |
fi | |
# Create the role since it does not exist | |
read -r -d '' qstr <<-EOF | |
INSERT INTO role (create_time, update_time, name, description, type, deleted) | |
VALUES (now(), now(), '$email', 'Private Role for $email', 'private', false) | |
RETURNING id | |
EOF | |
echo "QUERY: $qstr" | |
role_id="$(query_tsv "$qstr")" | |
echo "RESULTS: $results" | |
# Associate with the user | |
read -r -d '' qstr <<-EOF | |
INSERT INTO user_role_association (user_id, role_id, create_time, update_time) | |
VALUES ($user_id, $role_id, now(), now()) | |
EOF | |
echo "QUERY: $qstr" | |
query_tbl "$qstr" | |
# Insert into their personal default user permissions | |
read -r -d '' qstr <<-EOF | |
INSERT INTO default_user_permissions (user_id, action, role_id) | |
VALUES ($user_id, 'manage permissions', $role_id), ($user_id, 'access', $role_id) | |
EOF | |
echo "QUERY: $qstr" | |
results="$(query_tbl "$qstr")" | |
echo "RESULTS: $results" | |
# Fix dataset_permissions. Manage rows are created but have a null entry in their role_id | |
read -r -d '' qstr <<-EOF | |
UPDATE dataset_permissions | |
SET role_id = $role_id | |
WHERE | |
id | |
IN ( | |
SELECT | |
id | |
FROM | |
dataset_permissions | |
WHERE | |
dataset_id | |
IN ( | |
SELECT | |
ds.id | |
FROM | |
history AS h | |
LEFT JOIN history_dataset_association AS hda ON | |
h.id = hda.history_id | |
LEFT JOIN dataset AS ds ON hda.dataset_id = ds.id | |
WHERE | |
h.user_id = $user_id | |
) | |
AND role_id IS NULL | |
AND action = 'manage permissions' | |
) | |
EOF | |
echo "QUERY: $qstr" | |
results="$(query_tbl "$qstr")" | |
echo "RESULTS: $results" | |
# We could update history permissions but this would make those histories | |
# private and the current behaviour is that they're public, if the user has | |
# tried to share, then we'd be changing permissions and going against user | |
# expectations. | |
read -r -d '' QUERY <<-EOF | |
select 'done' | |
EOF | |
} | |
mutate_reassign-job-to-handler() { ## <job_id> <handler_id> [--commit]: Reassign a job to a different handler | |
handle_help "$@" <<-EOF | |
EOF | |
assert_count_ge $# 2 "Must supply a job and handler ID" | |
job_id=$1 | |
handler_id=$2 | |
read -r -d '' QUERY <<-EOF | |
UPDATE | |
job | |
SET | |
handler = '$handler_id' | |
WHERE | |
job.id = $job_id | |
EOF | |
txn_pre=$(txn_prefix "$3") | |
txn_pos=$(txn_postfix "$3") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_drop-extraneous-workflow-step-output-associations() { ## [--commit]: #8418, drop extraneous connection | |
handle_help "$@" <<-EOF | |
Per https://github.com/galaxyproject/galaxy/pull/8418, this drops the | |
workflow step output associations that are not necessary. | |
This only needs to be run once, on servers which have run Galaxy<=19.05 | |
to remove duplicate entries in the following tables: | |
- workflow_invocation_step_output_dataset_association | |
- workflow_invocation_step_output_dataset_collection_association | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH exclude_list AS ( | |
SELECT max(w.id) as id | |
FROM workflow_invocation_step_output_dataset_association as w | |
GROUP BY workflow_invocation_step_id, dataset_id, output_name | |
) | |
DELETE | |
FROM workflow_invocation_step_output_dataset_association wisoda | |
WHERE NOT EXISTS (SELECT 1 FROM exclude_list WHERE wisoda.id = exclude_list.id); | |
WITH exclude_list AS ( | |
SELECT max(w.id) as id | |
FROM workflow_invocation_step_output_dataset_collection_association as w | |
GROUP BY workflow_invocation_step_id, dataset_collection_id, output_name | |
) | |
DELETE | |
FROM workflow_invocation_step_output_dataset_collection_association wisodca | |
WHERE NOT EXISTS (SELECT 1 FROM exclude_list WHERE wisodca.id = exclude_list.id) | |
EOF | |
txn_pre=$(txn_prefix "$1") | |
txn_pos=$(txn_postfix "$1") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_restart-jobs() { ## [--commit] <-|job_id [job_id [...]]> : Restart some jobs | |
handle_help "$@" <<-EOF | |
Restart jobs | |
EOF | |
commit_flag="" | |
if [[ $1 == "--commit" ]]; then | |
commit_flag="$1" | |
shift; | |
fi | |
if [[ "$1" == "-" ]]; then | |
# read jobs from stdin | |
job_ids=$(cat | paste -s -d' ') | |
else | |
# read from $@ | |
job_ids=$@; | |
fi | |
job_ids_string=$(join_by ',' ${job_ids[@]}) | |
read -r -d '' QUERY <<-EOF | |
UPDATE job | |
SET state = 'new' | |
WHERE job.id in ($job_ids_string) | |
EOF | |
txn_pre=$(txn_prefix "$commit_flag") | |
txn_pos=$(txn_postfix "$commit_flag") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_generate-unset-api-keys() { ## [--commit]: Generate API keys for users which do not have one set. | |
handle_help "$@" <<-EOF | |
For some use cases (IEs), it is preferrable that everyone has an API | |
key set for them, if they don't choose to set one themselves. So we set | |
a base64'd key to be a bit extra secure just in case. These work just | |
fine like hex keys. | |
EOF | |
commit_flag="" | |
if [[ $1 == "--commit" ]]; then | |
commit_flag="$1" | |
shift; | |
fi | |
read -r -d '' QUERY <<-EOF | |
INSERT INTO api_keys (create_time, user_id, key) | |
( | |
SELECT | |
now(), | |
galaxy_user.id, | |
substring(regexp_replace(encode(random_bytea(64), 'base64')::TEXT, '[^a-z0-9A-Z]', '', 'g'), 0, 32) | |
FROM | |
galaxy_user LEFT JOIN api_keys ON galaxy_user.id = api_keys.user_id | |
WHERE | |
api_keys.key IS NULL | |
) | |
EOF | |
txn_pre=$(txn_prefix "$commit_flag") | |
txn_pos=$(txn_postfix "$commit_flag") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_anonymise-db-for-release() { ## [--commit|--very-unsafe]: This will attempt to make a database completely safe to release publicly. | |
handle_help "$@" <<-EOF | |
THIS WILL DESTROY YOUR DATABASE. | |
--commit will do it and wrap it in a transaction | |
--very-unsafe will just run it without the transaction | |
EOF | |
commit_flag="" | |
if [[ "$1" == "--commit" ]] || [[ "$1" == "--very-unsafe" ]]; then | |
commit_flag="$1" | |
shift; | |
fi | |
read -r -d '' QUERY <<-EOF | |
--DONE | |
CREATE OR REPLACE FUNCTION gxadmin_digest_value(input text) | |
RETURNS float | |
AS \$\$ | |
SELECT | |
('0.' || abs(('x' || substr(md5(current_date ||input::text),1,16))::bit(64)::bigint))::float | |
\$\$ LANGUAGE SQL STABLE; | |
--DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_seed(input text, size integer) | |
RETURNS table(value float) | |
AS \$\$ | |
SELECT gxadmin_digest_value(input::text || generate_series) as value | |
FROM generate_series(1, size) | |
\$\$ LANGUAGE SQL STABLE; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_number(input text, size integer) | |
RETURNS integer | |
AS \$\$ | |
SELECT (power(10, size) * gxadmin_digest_value(input))::integer | |
\$\$ LANGUAGE SQL IMMUTABLE; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_word(input text, size integer) | |
RETURNS text | |
AS \$\$ | |
SELECT array_to_string( | |
ARRAY( | |
SELECT chr((97 + round(value * 25)) :: integer) | |
FROM gxadmin_random_seed(input, size) | |
), | |
'' | |
); | |
\$\$ LANGUAGE SQL STABLE; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_slug(input text, size integer) | |
RETURNS text | |
AS \$\$ | |
SELECT | |
substr( | |
gxadmin_random_word(input || 'a' || size, 5 + (gxadmin_digest_value(input || 'a' || size) * size)::integer) | |
|| '-' || | |
gxadmin_random_word(input || 'b' || size, 3 + (gxadmin_digest_value(input || 'b' || size) * size)::integer) | |
|| '-' || | |
gxadmin_random_word(input || 'c' || size, 4 + (gxadmin_digest_value(input || 'c' || size) * size)::integer) | |
, 1, size) | |
\$\$ LANGUAGE SQL STABLE; | |
-- DONEish | |
CREATE OR REPLACE FUNCTION gxadmin_random_email(input text) | |
RETURNS text | |
AS \$\$ | |
with tmp as ( | |
select gxadmin_digest_value(input) as a | |
) | |
SELECT | |
CASE | |
WHEN a < 0.3 THEN gxadmin_random_number(input, 7) || '@example.com' | |
WHEN a < 0.8 THEN gxadmin_random_slug(input, 6) || '@example.com' | |
ELSE gxadmin_random_slug(input, 20) || '@example.com' | |
END | |
from tmp | |
\$\$ LANGUAGE SQL STABLE; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_ip(input text) | |
RETURNS text | |
AS \$\$ | |
-- Uses documentation range IPs | |
with tmp as ( | |
select gxadmin_digest_value(input::text) as a | |
) | |
SELECT | |
CASE | |
WHEN a < 0.7 THEN '192.0.2.' || (gxadmin_digest_value(input) * 255)::integer | |
ELSE '2001:0db8:85a3:8d3:1319:8a2e:0370:' || to_hex((gxadmin_digest_value(input) * 65536)::integer) | |
END | |
from tmp | |
\$\$ LANGUAGE SQL STABLE; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_string(input text, size integer, start integer, width integer) | |
RETURNS text | |
AS \$\$ | |
SELECT array_to_string( | |
ARRAY( | |
SELECT chr((start + round(value * width)) :: integer) | |
FROM gxadmin_random_seed(input, size) | |
), | |
'' | |
); | |
\$\$ LANGUAGE SQL STABLE; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_tag(input text) | |
RETURNS text | |
AS \$\$ | |
with tmp as ( | |
select gxadmin_digest_value(input) as a | |
) | |
SELECT | |
CASE | |
WHEN a < 0.1 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 65, 26) | |
WHEN a < 0.2 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 192, 200) | |
WHEN a < 0.3 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 1025, 100) | |
WHEN a < 0.4 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 5121, 200) | |
WHEN a < 0.5 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 9728, 400) | |
WHEN a < 0.6 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 14300, 100) | |
WHEN a < 0.8 THEN gxadmin_random_slug(input, 3 + gxadmin_random_number(input, 1)) | |
ELSE gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 48, 10) | |
END | |
from tmp | |
\$\$ LANGUAGE SQL; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_pw(input text, size integer) | |
RETURNS text | |
AS \$\$ | |
SELECT gxadmin_random_string(input, size, 48, 59); | |
\$\$ LANGUAGE SQL STABLE; | |
-- DONE | |
CREATE OR REPLACE FUNCTION gxadmin_random_text(input text, size integer) | |
RETURNS text | |
AS \$\$ | |
SELECT array_to_string( | |
ARRAY( | |
SELECT gxadmin_random_tag(input || generate_series) | |
FROM generate_series(1, size) | |
), | |
' ' | |
); | |
\$\$ LANGUAGE SQL; | |
-- DONE | |
-- https://stackoverflow.com/a/48901446 | |
CREATE OR REPLACE FUNCTION gxadmin_round_sf(n numeric, digits integer) | |
RETURNS numeric | |
AS \$\$ | |
SELECT floor(n / (10 ^ floor(log(n) - digits + 1))) * (10 ^ floor(log(n) - digits + 1)) | |
\$\$ LANGUAGE SQL IMMUTABLE STRICT; | |
-- api_keys | |
COPY (SELECT 'api_keys') to STDOUT; | |
update api_keys set key = 'random-key-' || user_id; | |
-- cleanup_event is OK | |
-- cleanup_event_dataset_association is OK | |
-- cleanup_event_hda_association is OK | |
-- cleanup_event_his tory_association is OK | |
-- cleanup_event_icda_association is OK | |
-- cleanup_event_ldda_association is EMPTY on AU | |
-- cleanup_event_library_association is EMPTY on AU | |
-- cleanup_event_library_dataset_association is EMPTY on AU | |
-- cleanup_event_library_folder_association is EMPTY on AU | |
-- cleanup_event_metadata_file_association is OK | |
-- cleanup_event_user_association is EMPTY on AU, EU | |
-- cloudauthz is EMPTY on AU, EU | |
-- custos_authnz_token is EMPTY on AU, EU | |
-- data_manager_history_association is OK (user_id, history_id) | |
-- data_manager_job_association is OK (job_id, DM id) | |
-- dataset | |
COPY (SELECT 'dataset') to STDOUT; | |
-- TODO: Do external_filename, and _extra_files_path need to be cleaned? | |
-- TODO: this is imperfect, we do something better in GRT where we take 2 SDs | |
-- https://stackoverflow.com/questions/48900936/postgresql-rounding-to-significant-figures | |
-- MAYBE use synthetic data here? | |
update dataset set | |
file_size = round(file_size, -3), | |
total_size = round(total_size, -3); | |
-- dataset_collection is OK (type, count, etc.) | |
-- dataset_collection_element | |
COPY (SELECT 'dataset_collection_element') to STDOUT; | |
update dataset_collection_element set | |
element_identifier = gxadmin_random_slug(id::text, 10); | |
-- dataset_hash is EMPTY on AU, EU | |
-- dataset_permissions is OK (role, dataset) | |
-- dataset_source | |
COPY (SELECT 'dataset_source') to STDOUT; | |
update dataset_source set | |
source_uri = 'https://example.org/test.dat'; | |
-- dataset_source_hash is EMPTY on AU, EU | |
-- dataset_tag_association is EMPTY on AU, EU | |
-- default_history_permissions is OK (hist_id, action, role_id) | |
-- default_quota_association is OK | |
-- default_user_permissions is OK (user, action, role) | |
-- deferred_job is EMPTY on AU, EU | |
-- dynamic_tool is EMPTY on AU, EU | |
-- event is EMPTY on AU, EU | |
-- extended_metadata is EMPTY on AU, EU | |
-- extended_metadata_index is EMPTY on AU, EU | |
-- external_service is EMPTY on AU, EU | |
-- form_definition is EMPTY on AU, EU | |
-- form_definition_current is EMPTY on AU, EU | |
-- form_values is EMPTY on AU, EU | |
-- galaxy_group | |
COPY (SELECT 'galaxy_group') to STDOUT; | |
update galaxy_group set | |
name = 'group-' || id; | |
-- galaxy_session | |
COPY (SELECT 'galaxy_session') to STDOUT; | |
update galaxy_session set | |
remote_host = gxadmin_random_ip(remote_host) | |
where remote_host is not null; | |
update galaxy_session set | |
remote_addr = gxadmin_random_ip(remote_addr) | |
where remote_addr is not null; | |
update galaxy_session set | |
referer = 'https://example.org' | |
where referer is not null; | |
update galaxy_session set | |
session_key = gxadmin_random_pw(id::text, 16), prev_session_id = null; | |
-- galaxy_session_to_history is OK (time, session_id, hist_id) | |
-- galaxy_user | |
COPY (SELECT 'galaxy_user') to STDOUT; | |
-- TODO: better email length/content distribution | |
-- TODO: better username length/content distribution. UNICODE. | |
-- TODO: rounding to SDs. | |
update galaxy_user set | |
email = 'user-' || id || '@example.org', | |
password = 'x', | |
username = 'user-' || id, | |
form_values_id = null, | |
activation_token = '', | |
disk_usage = round(disk_usage, -5); | |
-- galaxy_user_openid | |
COPY (SELECT 'galaxy_user_openid') to STDOUT; | |
update galaxy_user_openid set | |
openid = 'https://example.org/identity/' || user_id; | |
-- genome_index_tool_data is EMPTY on AU, EU | |
-- group_quota_association is OK (group_id, quota id) | |
-- group_role_association is OK (group_id, role id) | |
-- history | |
COPY (SELECT 'history') to STDOUT; | |
-- TODO: better name distribution. UNICODE. (I see greek, chinese, etc on EU) | |
update history set | |
name = gxadmin_random_tag(id::text) | |
where name != 'Unnamed history'; | |
update history set | |
slug = gxadmin_random_slug(id::text, 30) | |
where slug != '' or slug is not null; | |
-- history_annotation_association | |
COPY (SELECT 'history_annotation_association') to STDOUT; | |
-- TODO: better distribution. UNICODE. | |
update history_annotation_association set | |
annotation = gxadmin_random_text(id::text, 30) | |
where annotation is not null; | |
-- history_dataset_association | |
COPY (SELECT 'history_dataset_association') to STDOUT; | |
-- TODO: SIGNIFICANT validation needed. | |
update history_dataset_association set | |
name = gxadmin_random_tag(id::text) || '.' || extension; | |
update history_dataset_association set | |
peek = 'redacted' where peek is not null; | |
update history_dataset_association set | |
designation = 'hda-' || id::text | |
where designation is not null; | |
update history_dataset_association set | |
info = 'redacted' | |
where info is not null and info != 'Job output deleted by user before job completed'; | |
update history_dataset_association set | |
metadata = null; | |
-- history_dataset_association_annotation_association | |
COPY (SELECT 'history_dataset_association_annotation_association') to STDOUT; | |
update history_dataset_association_annotation_association set | |
annotation = gxadmin_random_text(id::text, 30) | |
where annotation is not null; | |
-- history_dataset_association_display_at_authorization is OK (user_id, hda, site=ucsc_main) | |
-- history_dataset_association_history | |
COPY (SELECT 'history_dataset_association_history') to STDOUT; | |
-- TODO: distribution. Consistency with HDA? | |
update history_dataset_association_history set | |
name = gxadmin_random_tag(history_dataset_association_id::text) || '.' || extension; | |
update history_dataset_association_history set | |
metadata = null; | |
-- history_dataset_association_rating_association is EMPTY on AU, EU | |
-- history_dataset_association_subset is OK (hda id, hdas id, location) | |
-- history_dataset_association_tag_association | |
COPY (SELECT 'history_dataset_association_tag_association') to STDOUT; | |
-- user_tname == 'group' and 'name' are special. | |
update history_dataset_association_tag_association set | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname in ('group', 'name'); | |
-- Sometimes people use a diffferent prefix | |
update history_dataset_association_tag_association set | |
user_tname = gxadmin_random_tag(id::text || 'tname'), | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname not in ('group', 'name') and user_tname is not null; | |
-- Otherwise just set some data | |
update history_dataset_association_tag_association set | |
user_tname = gxadmin_random_tag(id::text) | |
where user_tname is null; | |
-- history_dataset_collection_annotation_association is EMPTY on AU, EU | |
-- history_dataset_collection_association | |
COPY (SELECT 'history_dataset_collection_association') to STDOUT; | |
update history_dataset_collection_association set | |
name = 'hdca-' || id; | |
-- history_dataset_collection_rating_association is EMPTY on AU | |
-- history_dataset_collection_tag_association | |
COPY (SELECT 'history_dataset_collection_tag_association') to STDOUT; | |
-- user_tname == 'group' and 'name' are special. | |
update history_dataset_collection_tag_association set | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname in ('group', 'name'); | |
-- Sometimes people use a diffferent prefix | |
update history_dataset_collection_tag_association set | |
user_tname = gxadmin_random_tag(id::text || 'tname'), | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname not in ('group', 'name') and user_tname is not null; | |
-- Otherwise just set some data | |
update history_dataset_collection_tag_association set | |
user_tname = gxadmin_random_tag(id::text) | |
where user_tname is null; | |
-- history_rating_association is OK | |
-- history_tag_association | |
COPY (SELECT 'history_tag_association') to STDOUT; | |
-- user_tname == 'group' and 'name' are special. | |
update history_tag_association set | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname in ('group', 'name'); | |
-- Sometimes people use a diffferent prefix | |
update history_tag_association set | |
user_tname = gxadmin_random_tag(id::text || 'tname'), | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname not in ('group', 'name') and user_tname is not null; | |
-- Otherwise just set some data | |
update history_tag_association set | |
user_tname = gxadmin_random_tag(id::text) | |
where user_tname is null; | |
-- history_user_share_association is MAYBE OK (hist_id, user_id) You can determine networks, but same with groups. | |
-- implicit_collection_jobs is OK (id, pop state) | |
-- implicit_collection_jobs_job_association is OK | |
-- implicitly_converted_dataset_association is OK | |
-- implicitly_created_dataset_collection_inputs is OK | |
-- interactivetool_entry_point is EMPTY on AU | |
-- job | |
COPY (SELECT 'job') to STDOUT; | |
update job set | |
command_line = 'redacted', | |
destination_params = null, | |
dependencies = null; | |
update job set | |
tool_stdout = 'redacted' | |
where tool_stdout != null; | |
update job set | |
tool_stderr = 'redacted' | |
where tool_stderr != null; | |
update job set | |
traceback = 'redacted' | |
where traceback != null; | |
update job set | |
params = 'redacted' | |
where params != null; | |
update job set | |
job_messages = 'redacted' | |
where job_messages != null; | |
update job set | |
job_stdout = 'redacted' | |
where job_stdout != null; | |
update job set | |
job_stderr = 'redacted' | |
where job_stderr != null; | |
-- job_container_association | |
COPY (SELECT 'job_container_association') to STDOUT; | |
update job_container_association set | |
container_name = '', | |
container_info = ''; | |
-- job_export_history_archive | |
COPY (SELECT 'job_export_history_archive') to STDOUT; | |
update job_export_history_archive set | |
history_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'h', 6), | |
datasets_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'd', 6), | |
jobs_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'j', 6); | |
-- job_external_output_metadata | |
COPY (SELECT 'job_external_output_metadata') to STDOUT; | |
update job_external_output_metadata set | |
filename_in = '/tmp/job_working_directory/' || job_id || '/metadata_in_' || gxadmin_random_pw(id::text || 'i', 6), | |
filename_out = '/tmp/job_working_directory/' || job_id || '/metadata_out_' || gxadmin_random_pw(id::text || 'o', 6), | |
filename_results_code = '/tmp/job_working_directory/' || job_id || '/metadata_results_' || gxadmin_random_pw(id::text || 'm', 6), | |
filename_kwds = '/tmp/job_working_directory/' || job_id || '/metadata_kwds_' || gxadmin_random_pw(id::text || 'k', 6), | |
filename_override_metadata = '/tmp/job_working_directory/' || job_id || '/metadata_override_' || gxadmin_random_pw(id::text || 'o', 6); | |
-- job_import_history_archive | |
COPY (SELECT 'job_import_history_archive') to STDOUT; | |
update job_import_history_archive set | |
archive_dir = '/tmp/tmp' || gxadmin_random_pw(id::text, 6); | |
-- job_metric_numeric is OK | |
-- job_metric_text | |
COPY (SELECT 'job_metric_text') to STDOUT; | |
truncate job_metric_text; | |
-- job_parameter | |
COPY (SELECT 'job_parameter') to STDOUT; | |
-- TODO: length distribution? Name distribution? | |
update job_parameter set | |
name = 'param-name', | |
value = 'param-value'; | |
-- job_state_history is OK | |
-- job_to_implicit_output_dataset_collection is MAYBE OK (param name?) | |
-- job_to_input_dataset is MAYBE OK (param name?) | |
-- job_to_input_dataset_collection is MAYBE OK (param name?) | |
-- job_to_input_library_dataset is EMPTY on AU | |
-- job_to_output_dataset is MAYBE OK (param name?) | |
-- job_to_output_dataset_collection is MAYBE OK (param name?) | |
-- job_to_output_library_dataset is OK | |
-- kombu_message | |
COPY (SELECT 'kombu_message') to STDOUT; | |
truncate kombu_message; | |
-- kombu_queue is OK (queue name only) | |
-- library | |
COPY (SELECT 'library') to STDOUT; | |
update library set | |
name = gxadmin_random_tag(id::text), | |
description = gxadmin_random_tag(id::text || 'desc'), | |
synopsis = gxadmin_random_tag(id::text || 'synopsis'); | |
-- library_dataset | |
COPY (SELECT 'library_dataset') to STDOUT; | |
update library_dataset set | |
name = 'lda-' || id, | |
info = ''; | |
-- library_dataset_collection_annotation_association is EMPTY on AU, EU | |
-- library_dataset_collection_association is EMPTY on AU, EU | |
-- library_dataset_collection_rating_association is EMPTY on AU, EU | |
-- library_dataset_collection_tag_association is EMPTY on AU, EU | |
-- library_dataset_dataset_association | |
COPY (SELECT 'library_dataset_dataset_association') to STDOUT; | |
-- TODO: SIGNIFICANT validation needed. | |
update library_dataset_dataset_association set | |
name = gxadmin_random_tag(id::text) || '.' || extension; | |
update library_dataset_dataset_association set | |
peek = 'redacted' where peek is not null; | |
update library_dataset_dataset_association set | |
designation = 'hda-' || id::text | |
where designation is not null; | |
update library_dataset_dataset_association set | |
info = 'redacted' where info is not null; | |
update library_dataset_dataset_association set | |
message = 'redacted' where message is not null; | |
update library_dataset_dataset_association set | |
metadata = null; | |
-- library_dataset_dataset_association_permissions is OK (permissions, ldda id, role id) | |
-- library_dataset_dataset_association_tag_association is EMPTY on AU, EU | |
-- library_dataset_dataset_info_association is EMPTY on AU, EU | |
-- library_dataset_permissions is OK (permissions, ld id, role id) | |
-- library_folder | |
COPY (SELECT 'library_folder') to STDOUT; | |
update library_folder set | |
name = gxadmin_random_tag(id::text), | |
description = gxadmin_random_text(id::text || 'desc', 10); | |
-- library_folder_info_association is EMPTY on AU, EU | |
-- library_folder_permissions is OK (permissions, lf id, role id) | |
-- library_info_association is EMPTY on AU | |
-- library_permissions is OK (permissions, lib id, role id) | |
-- metadata_file is OK | |
-- migrate_tools is EMPTY on AU, EU | |
-- migrate_version is OK | |
-- oidc_user_authnz_tokens | |
COPY (SELECT 'oidc_user_authnz_tokens') to STDOUT; | |
update oidc_user_authnz_tokens set | |
uid = 'redacted', | |
extra_data = null; | |
-- page | |
COPY (SELECT 'page') to STDOUT; | |
update page set | |
title = gxadmin_random_tag(id::text), | |
slug = gxadmin_random_slug(id::text, 10); | |
-- page_annotation_association | |
COPY (SELECT 'page_annotation_association') to STDOUT; | |
update page_annotation_association set | |
annotation = gxadmin_random_tag(id::text); | |
-- page_rating_association is EMPTY on AU | |
-- page_revision | |
COPY (SELECT 'page_revision') to STDOUT; | |
update page_revision set | |
content = '<p>Some <b>content</b></p>'; | |
-- page_tag_association is EMPTY on AU | |
-- user_tname == 'group' and 'name' are special. | |
update page_tag_association set | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname in ('group', 'name'); | |
-- Sometimes people use a diffferent prefix | |
update page_tag_association set | |
user_tname = gxadmin_random_tag(id::text || 'tname'), | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname not in ('group', 'name') and user_tname is not null; | |
-- Otherwise just set some data | |
update page_tag_association set | |
user_tname = gxadmin_random_tag(id::text) | |
where user_tname is null; | |
-- page_user_share_association is EMPTY on AU, fine though | |
-- password_reset_token | |
COPY (SELECT 'password_reset_token') to STDOUT; | |
update password_reset_token set | |
token = md5('x' || random()); | |
-- post_job_action | |
COPY (SELECT 'post_job_action') to STDOUT; | |
update post_job_action set | |
action_arguments = convert_to('{}', 'UTF8') | |
where action_type != 'RenameDatasetAction'; | |
update post_job_action set | |
action_arguments = convert_to('{"newname": "Control check"}', 'UTF8') | |
where action_type = 'RenameDatasetAction'; | |
-- post_job_action_association is OK | |
-- psa_association | |
COPY (SELECT 'psa_association') to STDOUT; | |
update psa_association set | |
handle = 'redacted', | |
assoc_type = 'redacted'; | |
-- psa_code is EMPTY on AU, EU | |
-- psa_nonce is EMPTY on AU, EU | |
-- psa_partial is EMPTY on AU, EU | |
-- quota | |
COPY (SELECT 'quota') to STDOUT; | |
update quota set | |
name = pg_size_pretty(bytes), | |
description = ''; | |
-- repository_dependency is EMPTY on AU, OK on EU (ts repo id) | |
-- repository_repository_dependency_association is EMPTY on AU, OK on EU | |
-- request is EMPTY on AU, EU | |
-- request_event is EMPTY on AU, EU | |
-- request_type is EMPTY on AU, EU | |
-- request_type_external_service_association is EMPTY on AU, EU | |
-- request_type_permissions is EMPTY on AU, EU | |
-- request_type_run_association is EMPTY on AU, EU | |
-- role | |
COPY (SELECT 'role') to STDOUT; | |
update role set | |
name = gxadmin_random_email(id::text), | |
description = 'Private role for ' || gxadmin_random_email(id::text) | |
where type = 'private'; | |
update role set | |
name = gxadmin_random_text(id::text, 3), | |
description = 'System role' | |
where type = 'system'; | |
update role set | |
name = gxadmin_random_text(id::text, 5), | |
description = 'Sharing role' | |
where type = 'sharing'; | |
update role set | |
name = gxadmin_random_text(id::text, 2), | |
description = '' | |
where type not in ('private', 'system', 'sharing'); | |
-- run is EMPTY on AU, EU | |
-- sample is EMPTY on AU, EU | |
-- sample_dataset is EMPTY on AU, EU | |
-- sample_event is EMPTY on AU, EU | |
-- sample_run_association is EMPTY on AU, EU | |
-- sample_state is EMPTY on AU, EU | |
-- stored_workflow | |
COPY (SELECT 'stored_workflow') to STDOUT; | |
update stored_workflow set | |
name = gxadmin_random_tag(id::text), | |
slug = gxadmin_random_slug(id::text, 10); | |
-- stored_workflow_annotation_association | |
COPY (SELECT 'stored_workflow_annotation_association') to STDOUT; | |
update stored_workflow set | |
annotation = gxadmin_random_text(id::text) | |
where annotation is not null; | |
-- stored_workflow_menu_entry is OK | |
-- stored_workflow_rating_association is OK | |
-- stored_workflow_tag_association | |
COPY (SELECT 'stored_workflow_tag_association') to STDOUT; | |
-- user_tname == 'group' and 'name' are special. | |
update stored_workflow_tag_association set | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname in ('group', 'name'); | |
-- Sometimes people use a diffferent prefix | |
update stored_workflow_tag_association set | |
user_tname = gxadmin_random_tag(tag_id::text), | |
value = lower(gxadmin_random_tag(id::text)), | |
user_value = gxadmin_random_tag(id::text) | |
where user_tname not in ('group', 'name') and user_tname is not null; | |
-- Otherwise just set some data | |
update stored_workflow_tag_association set | |
user_tname = gxadmin_random_tag(tag_id::text) | |
where user_tname is null; | |
-- stored_workflow_user_share_connection is OK | |
-- tag | |
COPY (SELECT 'tag') to STDOUT; | |
update tag set | |
name = gxadmin_random_tag(id::text) | |
where name not in ('name', 'group'); | |
-- task is EMPTY on AU, not on EU | |
update task set | |
command_line = '', | |
param_filename = '', | |
runner_name = '', | |
tool_stdout = '', | |
tool_stderr = '', | |
task_runner_name = '', | |
task_runner_external_id = '', | |
prepare_input_files_cmd = '', | |
working_directory = '', | |
info = '', | |
job_messages = '', | |
job_stdout = '', | |
job_stderr = ''; | |
-- task_metric_numeric is EMPTY on AU, EU | |
-- task_metric_text is EMPTY on AU, EU | |
-- tool_dependency is EMPTY on AU, OK on EU | |
-- tool_shed_repository is EMPTY on AU, OK on EU | |
-- tool_tag_association is EMPTY on AU, EU | |
-- tool_version is EMPTY on AU, OK on EU | |
-- tool_version_association is EMPTY on AU, EU | |
-- transfer_job is EMPTY on AU, OK on EU | |
-- user_action is EMPTY on AU, EU | |
-- user_address | |
COPY (SELECT 'user_address') to STDOUT; | |
update user_address set | |
"desc" = gxadmin_random_text(id::text, 1), | |
name = gxadmin_random_text(id::text || 'name', 1), | |
institution = gxadmin_random_text(id::text || 'inst', 1), | |
address = gxadmin_random_text(id::text || 'addr', 1), | |
city = gxadmin_random_text(id::text || 'city', 1), | |
state = gxadmin_random_text(id::text || 'stat', 1), | |
postal_code = gxadmin_random_number(id::text, 5), | |
country = 'Australia', | |
phone = gxadmin_random_number(id::text, 10); | |
-- user_group_association is OK | |
-- user_preference | |
COPY (SELECT 'user_preference') to STDOUT; | |
-- TODO: make this better? I just don't feel safe given genomespace tokens, etc. | |
truncate user_preference; | |
-- user_quota_association is OK | |
-- user_role_association is OK | |
-- validation_error is EMPTY on AU | |
-- visualization | |
COPY (SELECT 'visualization') to STDOUT; | |
update visualization set | |
title = gxadmin_random_text(id::text, 3), | |
slug = gxadmin_random_slug(id::text, 20); | |
-- visualization_annotation_association is EMPTY on AU | |
update visualization_annotation_association set | |
annotation = gxadmin_random_text(id::text, 10); | |
-- visualization_rating_association is EMPTY on AU | |
-- visualization_revision is MAYBE OK | |
-- visualization_tag_association is EMPTY on AU | |
-- visualization_user_share_association is EMPTY on AU | |
-- worker_process is OK | |
-- workflow | |
COPY (SELECT 'workflow') to STDOUT; | |
update workflow set | |
name = gxadmin_random_text(id::text, 3); | |
update workflow set | |
reports_config = convert_to('{}', 'UTF8') | |
where reports_config is not null; | |
-- workflow_invocation is OK | |
-- workflow_invocation_output_dataset_association is OK | |
-- workflow_invocation_output_dataset_collection_association is OK | |
-- workflow_invocation_output_value is EMPTY on AU | |
-- workflow_invocation_step is OK | |
-- workflow_invocation_step_output_dataset_association is MAYBE OK (output_name?) | |
-- workflow_invocation_step_output_dataset_collection_association is MAYBE OK (output_name?) | |
-- workflow_invocation_to_subworkflow_invocation_association is OK | |
-- workflow_output is MAYBE OK (output_name) | |
-- workflow_request_input_parameters is OK | |
-- workflow_request_input_step_parameter | |
COPY (SELECT 'workflow_request_input_step_parameter') to STDOUT; | |
update workflow_request_input_step_parameter set | |
parameter_value = convert_to('asdf', 'UTF8'); | |
-- workflow_request_step_states | |
COPY (SELECT 'workflow_request_step_states') to STDOUT; | |
update workflow_request_step_states set | |
value = convert_to('{}', 'UTF8'); | |
-- workflow_request_to_input_collection_dataset is OK | |
-- workflow_request_to_input_dataset is OK | |
-- workflow_step | |
COPY (SELECT 'workflow_step') to STDOUT; | |
update workflow_step set | |
tool_inputs = convert_to('{}', 'UTF8'); | |
update workflow_step set | |
label = gxadmin_random_slug(id::text, 10) | |
where label is not null; | |
-- workflow_step_annotation_association | |
COPY (SELECT 'workflow_step_annotation_association') to STDOUT; | |
update workflow_step_annotation_association set | |
annotation = gxadmin_random_text(id::text, 10); | |
-- workflow_step_connection is OK | |
-- workflow_step_input is OK | |
-- workflow_step_tag_association is EMPTY on AU, EU | |
-- workflow_tag_association is EMPTY on AU, EU | |
EOF | |
txn_pre=$(txn_prefix "$commit_flag") | |
txn_pos=$(txn_postfix "$commit_flag") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_fail-wfi() { ## <wf-invocation-d> [--commit]: Sets a workflow invocation state to failed | |
handle_help "$@" <<-EOF | |
Sets a workflow invocation's state to "failed" | |
EOF | |
assert_count_ge $# 1 "Must supply a wf-invocation-id" | |
id=$1 | |
read -r -d '' QUERY <<-EOF | |
UPDATE | |
workflow_invocation | |
SET | |
state = 'failed' | |
WHERE | |
id = '$id' | |
EOF | |
txn_pre=$(txn_prefix "$2") | |
txn_pos=$(txn_postfix "$2") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_oidc-by-emails() { ## <email_from> <email_to> [--commit]: Reassign OIDC account between users. | |
handle_help "$@" <<-EOF | |
Workaround for users creating a new account by clicking the OIDC button, with case mismatching between existing accounts. | |
Please note that this function is case-sensitive. Fixes https://github.com/galaxyproject/galaxy/issues/9981. | |
EOF | |
assert_count_ge $# 2 "Must supply an email_from and an email_to"; | |
read -r -d '' QUERY <<-EOF | |
UPDATE oidc_user_authnz_tokens | |
SET user_id=correctuser.id | |
FROM ( | |
SELECT id FROM galaxy_user WHERE email='$2' | |
) AS correctuser | |
WHERE user_id = (SELECT id FROM galaxy_user WHERE email='$1') | |
EOF | |
txn_pre=$(txn_prefix "$3") | |
txn_pos=$(txn_postfix "$3") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
mutate_set-quota-for-oidc-user() { ##? <provider_name> <quota_name> [--commit]: Set quota for OIDC users. | |
handle_help "$@" <<-EOF | |
Set quota for OIDC users. | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH qid AS ( | |
SELECT id FROM quota WHERE name='$arg_quota_name' | |
) | |
DELETE FROM user_quota_association WHERE quota_id = ( SELECT id FROM qid ); | |
WITH qid AS ( | |
SELECT id FROM quota WHERE name='$arg_quota_name' | |
), t AS ( | |
SELECT user_id, ( SELECT id FROM qid ), now(), now() FROM oidc_user_authnz_tokens WHERE provider='$arg_provider_name' | |
) | |
INSERT INTO user_quota_association (user_id, quota_id, create_time, update_time) | |
SELECT * FROM t | |
EOF | |
txn_pre=$(txn_prefix "$arg_commit") | |
txn_pos=$(txn_postfix "$arg_commit") | |
QUERY="$txn_pre $QUERY; $txn_pos" | |
} | |
registered_subcommands="$registered_subcommands query" | |
_query_short_help="DB Queries" | |
_query_long_help=" | |
'query' can be exchanged with 'tsvquery' or 'csvquery' for tab- and comma-separated variants. | |
In some cases 'iquery' is supported for InfluxDB compatible output. | |
In all cases 'explainquery' will show you the query plan, in case you need to optimise or index data. 'explainjsonquery' is useful with PEV: http://tatiyants.com/pev/ | |
" | |
query_latest-users() { ## : 40 recently registered users | |
handle_help "$@" <<-EOF | |
Returns 40 most recently registered users | |
$ gxadmin query latest-users | |
id | create_time | disk_usage | username | email | groups | active | |
----+-------------------------------+------------+----------+----------------+-----------------------------------+-------- | |
3 | 2019-03-07 13:06:37.945403+00 | | beverly | [email protected] | | t | |
2 | 2019-03-07 13:06:23.369201+00 | 826 bytes | alice | [email protected] | | t | |
1 | 2018-11-19 14:54:30.969713+00 | 869 MB | helena | [email protected] | training-asdf training-hogeschool | t | |
(3 rows) | |
EOF | |
username=$(gdpr_safe galaxy_user.username username) | |
email=$(gdpr_safe galaxy_user.email email) | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, | |
create_time AT TIME ZONE 'UTC' as create_time, | |
pg_size_pretty(disk_usage) as disk_usage, | |
$username, | |
$email, | |
array_to_string(ARRAY( | |
select galaxy_group.name from galaxy_group where id in ( | |
select group_id from user_group_association where user_group_association.user_id = galaxy_user.id | |
) | |
), ' ') as groups, | |
active | |
FROM galaxy_user | |
ORDER BY create_time desc | |
LIMIT 40 | |
EOF | |
} | |
query_tool-usage() { ##? [weeks]: Counts of tool runs in the past weeks (default = all) | |
handle_help "$@" <<-EOF | |
$ gxadmin tool-usage | |
tool_id | count | |
------------------------------------------------------------------------+-------- | |
toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/1.1.0 | 958154 | |
Grouping1 | 638890 | |
toolshed.g2.bx.psu.edu/repos/devteam/intersect/gops_intersect_1/1.0.0 | 326959 | |
toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0 | 320236 | |
addValue | 313470 | |
toolshed.g2.bx.psu.edu/repos/devteam/join/gops_join_1/1.0.0 | 312735 | |
upload1 | 103595 | |
toolshed.g2.bx.psu.edu/repos/rnateam/graphclust_nspdk/nspdk_sparse/9.2 | 52861 | |
Filter1 | 43253 | |
EOF | |
where= | |
if (( arg_weeks > 0 )); then | |
where="WHERE j.create_time > (now() - '${arg_weeks} weeks'::interval)" | |
fi | |
fields="count=1" | |
tags="tool_id=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
j.tool_id, count(*) AS count | |
FROM job j | |
$where | |
GROUP BY j.tool_id | |
ORDER BY count DESC | |
EOF | |
} | |
query_tool-usage-over-time() { ##? [searchterm]: Counts of tool runs by month, filtered by a tool id search | |
handle_help "$@" <<-EOF | |
$ gxadmin tool-usage-over-time | |
tool_id | count | |
------------------------------------------------------------------------+-------- | |
toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/1.1.0 | 958154 | |
Grouping1 | 638890 | |
toolshed.g2.bx.psu.edu/repos/devteam/intersect/gops_intersect_1/1.0.0 | 326959 | |
toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0 | 320236 | |
addValue | 313470 | |
toolshed.g2.bx.psu.edu/repos/devteam/join/gops_join_1/1.0.0 | 312735 | |
upload1 | 103595 | |
toolshed.g2.bx.psu.edu/repos/rnateam/graphclust_nspdk/nspdk_sparse/9.2 | 52861 | |
Filter1 | 43253 | |
EOF | |
where= | |
if [[ "$arg_searchterm" != "" ]]; then | |
where="WHERE tool_id like '%$arg_searchterm%'" | |
fi | |
read -r -d '' QUERY <<-EOF | |
WITH | |
cte | |
AS ( | |
SELECT | |
date_trunc('month', create_time), | |
tool_id | |
FROM | |
job | |
$where | |
) | |
SELECT | |
date_trunc, tool_id, count(*) | |
FROM | |
cte | |
GROUP BY | |
date_trunc, tool_id | |
ORDER BY | |
date_trunc ASC, count DESC | |
EOF | |
} | |
query_tool-popularity() { ##? [months|24]: Most run tools by month (tool_predictions) | |
handle_help "$@" <<-EOF | |
See most popular tools by month | |
$ ./gxadmin query tool-popularity 1 | |
tool_id | month | count | |
---------------------------+------------+------- | |
circos | 2019-02-01 | 20 | |
upload1 | 2019-02-01 | 12 | |
require_format | 2019-02-01 | 9 | |
circos_gc_skew | 2019-02-01 | 7 | |
circos_wiggle_to_scatter | 2019-02-01 | 3 | |
test_history_sanitization | 2019-02-01 | 2 | |
circos_interval_to_tile | 2019-02-01 | 1 | |
__SET_METADATA__ | 2019-02-01 | 1 | |
(8 rows) | |
EOF | |
fields="count=2" | |
tags="tool_id=0;month=1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
tool_id, | |
date_trunc('month', create_time AT TIME ZONE 'UTC')::date as month, | |
count(*) | |
FROM job | |
WHERE create_time > (now() AT TIME ZONE 'UTC' - '$arg_months months'::interval) | |
GROUP BY tool_id, month | |
ORDER BY month desc, count desc | |
EOF | |
} | |
query_workflow-connections() { ##? [--all]: The connections of tools, from output to input, in the latest (or all) versions of user workflows (tool_predictions) | |
handle_help "$@" <<-EOF | |
This is used by the usegalaxy.eu tool prediction workflow, allowing for building models out of tool connections in workflows. | |
$ gxadmin query workflow-connections | |
wf_id | wf_updated | in_id | in_tool | in_tool_v | out_id | out_tool | out_tool_v | published | deleted | has_errors | |
-------+---------------------+-------+-------------------+-----------+--------+-------------------+---------------------------------------------- | |
3 | 2013-02-07 16:48:00 | 5 | Grep1 | 1.0.1 | 12 | | | f | f | f | |
3 | 2013-02-07 16:48:00 | 6 | Cut1 | 1.0.1 | 7 | Remove beginning1 | 1.0.0 | f | f | f | |
3 | 2013-02-07 16:48:00 | 7 | Remove beginning1 | 1.0.0 | 5 | Grep1 | 1.0.1 | f | f | f | |
3 | 2013-02-07 16:48:00 | 8 | addValue | 1.0.0 | 6 | Cut1 | 1.0.1 | t | f | f | |
3 | 2013-02-07 16:48:00 | 9 | Cut1 | 1.0.1 | 7 | Remove beginning1 | 1.0.0 | f | f | f | |
3 | 2013-02-07 16:48:00 | 10 | addValue | 1.0.0 | 11 | Paste1 | 1.0.0 | t | f | f | |
3 | 2013-02-07 16:48:00 | 11 | Paste1 | 1.0.0 | 9 | Cut1 | 1.0.1 | f | f | f | |
3 | 2013-02-07 16:48:00 | 11 | Paste1 | 1.0.0 | 8 | addValue | 1.0.0 | t | t | f | |
4 | 2013-02-07 16:48:00 | 13 | cat1 | 1.0.0 | 18 | addValue | 1.0.0 | t | f | f | |
4 | 2013-02-07 16:48:00 | 13 | cat1 | 1.0.0 | 20 | Count1 | 1.0.0 | t | t | f | |
EOF | |
read -r -d '' wf_filter <<-EOF | |
WHERE | |
workflow.id in ( | |
SELECT | |
workflow.id | |
FROM | |
stored_workflow | |
LEFT JOIN | |
workflow on stored_workflow.latest_workflow_id = workflow.id | |
) | |
EOF | |
if [[ -n "$arg_all" ]]; then | |
wf_filter="" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
workflow.id as wf_id, | |
workflow.update_time::DATE as wf_updated, | |
ws_in.id as in_id, | |
ws_in.tool_id as in_tool, | |
ws_in.tool_version as in_tool_v, | |
ws_out.id as out_id, | |
ws_out.tool_id as out_tool, | |
ws_out.tool_version as out_tool_v, | |
sw.published as published, | |
sw.deleted as deleted, | |
workflow.has_errors as has_errors | |
FROM workflow_step_connection wfc | |
LEFT JOIN workflow_step ws_in ON ws_in.id = wfc.output_step_id | |
LEFT JOIN workflow_step_input wsi ON wfc.input_step_input_id = wsi.id | |
LEFT JOIN workflow_step ws_out ON ws_out.id = wsi.workflow_step_id | |
LEFT JOIN workflow_output as wo ON wsi.workflow_step_id = wfc.output_step_id | |
LEFT JOIN workflow on ws_in.workflow_id = workflow.id | |
LEFT JOIN stored_workflow as sw on sw.latest_workflow_id = workflow.id | |
$wf_filter | |
EOF | |
} | |
query_history-connections() { ## : The connections of tools, from output to input, in histories (tool_predictions) | |
handle_help "$@" <<-EOF | |
This is used by the usegalaxy.eu tool prediction workflow, allowing for building models out of tool connections. | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
h.id AS h_id, | |
h.update_time::DATE AS h_update, | |
jtod.job_id AS in_id, | |
j.tool_id AS in_tool, | |
j.tool_version AS in_tool_v, | |
jtid.job_id AS out_id, | |
j2.tool_id AS out_tool, | |
j2.tool_version AS out_ver | |
FROM | |
job AS j | |
LEFT JOIN history AS h ON j.history_id = h.id | |
LEFT JOIN job_to_output_dataset AS jtod ON j.id = jtod.job_id | |
LEFT JOIN job_to_input_dataset AS jtid ON jtod.dataset_id = jtid.dataset_id | |
LEFT JOIN job AS j2 ON jtid.job_id = j2.id | |
WHERE | |
jtid.job_id IS NOT NULL | |
EOF | |
} | |
query_datasets-created-daily() { ## : The min/max/average/p95/p99 of total size of datasets created in a single day. | |
handle_help "$@" <<-EOF | |
$ gxadmin query datasets-created-daily | |
min | quant_1st | median | mean | quant_3rd | perc_95 | perc_99 | max | sum | stddev | |
-----+-----------+---------+-----------------------+-----------+-----------+-----------+-----------+------------+--------------- | |
2 | 303814 | 6812862 | 39653071.914285714286 | 30215616 | 177509882 | 415786146 | 533643009 | 1387857517 | 96920615.1745 | |
(1 row) | |
or more readably: | |
$ gxadmin query datasets-created-daily --human | |
min | quant_1st | median | mean | quant_3rd | perc_95 | perc_99 | max | sum | stddev | |
---------+-----------+---------+-------+-----------+---------+---------+--------+---------+-------- | |
2 bytes | 297 kB | 6653 kB | 38 MB | 29 MB | 169 MB | 397 MB | 509 MB | 1324 MB | 92 MB | |
(1 row) | |
EOF | |
if [[ $1 == "--human" ]]; then | |
summary="$(summary_statistics sum 1)" | |
else | |
summary="$(summary_statistics sum)" | |
fi | |
read -r -d '' QUERY <<-EOF | |
WITH temp_queue_times AS | |
(select | |
date_trunc('day', create_time AT TIME ZONE 'UTC'), | |
sum(coalesce(total_size, file_size)) | |
from dataset | |
group by date_trunc | |
order by date_trunc desc) | |
select | |
$summary | |
from temp_queue_times | |
EOF | |
} | |
query_largest-collection() { ## : Returns the size of the single largest collection | |
handle_help "$@" <<-EOF | |
EOF | |
fields="count=0" | |
tags="" | |
read -r -d '' QUERY <<-EOF | |
WITH temp_table_collection_count AS ( | |
SELECT count(*) | |
FROM dataset_collection_element | |
GROUP BY dataset_collection_id | |
ORDER BY count desc | |
) | |
select max(count) as count from temp_table_collection_count | |
EOF | |
} | |
query_queue-time() { ##? <tool_id>: The average/95%/99% a specific tool spends in queue state. | |
handle_help "$@" <<-EOF | |
$ gxadmin query queue-time toolshed.g2.bx.psu.edu/repos/nilesh/rseqc/rseqc_geneBody_coverage/2.6.4.3 | |
min | perc_95 | perc_99 | max | |
-----------------+-----------------+-----------------+----------------- | |
00:00:15.421457 | 00:00:55.022874 | 00:00:59.974171 | 00:01:01.211995 | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH temp_queue_times AS | |
(select | |
min(a.create_time - b.create_time) as queue_time | |
from | |
job_state_history as a | |
inner join | |
job_state_history as b | |
on | |
(a.job_id = b.job_id) | |
where | |
a.job_id in (select id from job where tool_id like '%${arg_tool_id}%' and state = 'ok' and create_time > (now() AT TIME ZONE 'UTC' - '3 months'::interval)) | |
and a.state = 'running' | |
and b.state = 'queued' | |
group by | |
a.job_id | |
order by | |
queue_time desc | |
) | |
select | |
min(queue_time), | |
percentile_cont(0.95) WITHIN GROUP (ORDER BY queue_time) as perc_95, | |
percentile_cont(0.99) WITHIN GROUP (ORDER BY queue_time) as perc_99, | |
max(queue_time) | |
from temp_queue_times | |
EOF | |
} | |
query_queue() { ## [--by (tool|destination|user)]: Brief overview of currently running jobs grouped by tool (default) or other columns | |
handle_help "$@" <<-EOF | |
$ gxadmin query queue | |
tool_id | state | count | |
-------------------------------------------------------------------+---------+------- | |
toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | queued | 9 | |
toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0 | running | 7 | |
toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2 | queued | 6 | |
ebi_sra_main | running | 6 | |
toolshed.g2.bx.psu.edu/repos/iuc/trinity/trinity/2.8.3 | queued | 5 | |
toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.2 | running | 5 | |
toolshed.g2.bx.psu.edu/repos/nml/spades/spades/3.11.1+galaxy1 | queued | 4 | |
toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0 | running | 2 | |
toolshed.g2.bx.psu.edu/repos/nml/metaspades/metaspades/3.9.0 | running | 2 | |
upload1 | running | 2 | |
$ gxadmin query queue --by destination | |
destination_id | state | job_count | |
----------------+---------+----------- | |
normal | running | 128 | |
multicore | running | 64 | |
multicore | queued | 16 | |
$ gxadmin iquery queue --by destination | |
queue-summary-by-destination,state=running,destination_id=normal count=128 | |
queue-summary-by-destination,state=running,destination_id=multicore count=64 | |
queue-summary-by-destination,state=queued,destination_id=multicore count=16 | |
EOF | |
fields="count=2" | |
tags="tool_id=0;state=1" | |
column="tool_id" | |
title="tool" | |
if [[ "$1" == "--by" ]]; then | |
if [[ "$2" == "user" ]]; then | |
tags="user_id=0;state=1" | |
column="user_id" | |
title="user" | |
query_name="queue_by_user" | |
elif [[ "$2" == "destination" ]]; then | |
tags="destination_id=0;state=1" | |
column="destination_id" | |
title="destination" | |
query_name="queue_by_destination" | |
elif [[ "$2" == "tool" ]]; then | |
query_name="queue_by_tool" | |
# nothing else needed | |
else | |
error "Unknown attribute" | |
exit 1 | |
fi | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
${column}, state, count(${column}) as ${title}_count | |
FROM | |
job | |
WHERE | |
state in ('queued', 'running') | |
GROUP BY | |
${column}, state | |
ORDER BY | |
${title}_count desc | |
EOF | |
} | |
query_queue-overview() { ##? [--short-tool-id]: View used mostly for monitoring | |
handle_help "$@" <<-EOF | |
Primarily for monitoring of queue. Optimally used with 'iquery' and passed to Telegraf. | |
$ gxadmin iquery queue-overview | |
queue-overview,tool_id=upload1,tool_version=0.0.1,state=running,handler=main.web.1,destination_id=condor,job_runner_name=condor,user=1 count=1 | |
EOF | |
# Use full tool id by default | |
tool_id="tool_id" | |
if [[ -n "$arg_short_tool_id" ]]; then | |
tool_id="regexp_replace(tool_id, '.*toolshed.*/repos/', '')" | |
fi | |
# Include by default | |
if [ -z "$GDPR_MODE" ]; then | |
user_id='user_id' | |
else | |
user_id="'0'" | |
fi | |
fields="count=6" | |
tags="tool_id=0;tool_version=1;destination_id=2;handler=3;state=4;job_runner_name=5;user_id=7" | |
read -r -d '' QUERY <<-EOF | |
WITH queue AS ( | |
SELECT | |
regexp_replace($tool_id, '/[0-9.a-z+-]+$', '')::TEXT AS tool_id, | |
tool_version::TEXT, | |
COALESCE(destination_id, 'unknown')::TEXT AS destination_id, | |
COALESCE(handler, 'unknown')::TEXT AS handler, | |
state::TEXT, | |
COALESCE(job_runner_name, 'unknown')::TEXT AS job_runner_name, | |
count(*) AS count, | |
$user_id::TEXT AS user_id | |
FROM | |
job | |
WHERE | |
state = 'running' OR state = 'queued' OR state = 'new' | |
GROUP BY | |
tool_id, tool_version, destination_id, handler, state, job_runner_name, user_id | |
) | |
SELECT | |
tool_id, tool_version, destination_id, handler, state, job_runner_name, sum(count), user_id | |
FROM | |
queue | |
GROUP BY | |
tool_id, tool_version, destination_id, handler, state, job_runner_name, user_id | |
EOF | |
} | |
query_queue-details() { | |
query_queue-detail $@ | |
} | |
query_queue-detail() { ##? [--all] [--seconds] [--since-update]: Detailed overview of running and queued jobs | |
handle_help "$@" <<-EOF | |
$ gxadmin query queue-detail | |
state | id | extid | tool_id | username | time_since_creation | |
---------+---------+---------+---------------------------------------------------------------------------+----------+--------------------- | |
running | 4360629 | 229333 | toolshed.g2.bx.psu.edu/repos/bgruening/infernal/infernal_cmsearch/1.1.2.0 | xxxx | 5 days 11:00:00 | |
running | 4362676 | 230237 | toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0 | xxxx | 4 days 18:00:00 | |
running | 4364499 | 231055 | toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0 | xxxx | 4 days 05:00:00 | |
running | 4366604 | 5183013 | toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0 | xxxx | 3 days 20:00:00 | |
running | 4366605 | 5183016 | toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0 | xxxx | 3 days 20:00:00 | |
queued | 4350274 | 225743 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | xxxx | 9 days 05:00:00 | |
queued | 4353435 | 227038 | toolshed.g2.bx.psu.edu/repos/iuc/trinity/trinity/2.8.3 | xxxx | 8 days 08:00:00 | |
queued | 4361914 | 229712 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | xxxx | 5 days -01:00:00 | |
queued | 4361812 | 229696 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | xxxx | 5 days -01:00:00 | |
queued | 4361939 | 229728 | toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2 | xxxx | 4 days 21:00:00 | |
queued | 4361941 | 229731 | toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2 | xxxx | 4 days 21:00:00 | |
EOF | |
fields="count=9" | |
tags="state=0;id=1;extid=2;tool_id=3;username=4;handler=6;job_runner_name=7;destination_id=8" | |
d="" | |
nonpretty="(" | |
time_column="job.create_time" | |
time_column_name="time_since_creation" | |
if [[ -n "$arg_all" ]]; then | |
d=", 'new'" | |
fi | |
if [[ -n "$arg_seconds" ]]; then | |
fields="$fields;time_since_creation=5" | |
nonpretty="EXTRACT(EPOCH FROM " | |
fi | |
if [[ -n "$arg_since_update" ]]; then | |
time_column="job.update_time" | |
time_column_name="time_since_update" | |
fi | |
username=$(gdpr_safe galaxy_user.username username "Anonymous User") | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.state, | |
job.id, | |
job.job_runner_external_id as extid, | |
job.tool_id, | |
$username, | |
$nonpretty now() AT TIME ZONE 'UTC' - $time_column) as $time_column_name, | |
job.handler, | |
job.job_runner_name, | |
COALESCE(job.destination_id, 'none') as destination_id, | |
1 as count | |
FROM job | |
FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id | |
WHERE | |
state in ('running', 'queued'$d) | |
ORDER BY | |
state desc, | |
$time_column_name desc | |
EOF | |
} | |
query_runtime-per-user() { ##? <email>: computation time of user (by email) | |
handle_help "$@" <<-EOF | |
$ gxadmin query runtime-per-user [email protected] | |
sum | |
---------- | |
14:07:39 | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT sum((metric_value || ' second')::interval) | |
FROM job_metric_numeric | |
WHERE job_id in ( | |
SELECT id | |
FROM job | |
WHERE user_id in ( | |
SELECT id | |
FROM galaxy_user | |
where email = '$arg_email' | |
) | |
) AND metric_name = 'runtime_seconds' | |
EOF | |
} | |
query_jobs-nonterminal() { ## [--states=new,queued,running] [--update-time] [--older-than=<interval>] [username|id|email]: Job info of nonterminal jobs separated by user | |
handle_help "$@" <<-EOF | |
You can request the user information by username, id, and user email | |
$ gxadmin query jobs-nonterminal helena-rasche | |
id | tool_id | state | create_time | runner | ext_id | handler | user_id | |
---------+---------------------+---------+----------------------------+--------+--------+-----------------+--------- | |
4760549 | featurecounts/1.6.3 | running | 2019-01-18 14:05:14.871711 | condor | 197549 | handler_main_7 | 599 | |
4760552 | featurecounts/1.6.3 | running | 2019-01-18 14:05:16.205867 | condor | 197552 | handler_main_7 | 599 | |
4760554 | featurecounts/1.6.3 | running | 2019-01-18 14:05:17.170157 | condor | 197580 | handler_main_8 | 599 | |
4760557 | featurecounts/1.6.3 | running | 2019-01-18 14:05:18.25044 | condor | 197545 | handler_main_10 | 599 | |
4760573 | featurecounts/1.6.3 | running | 2019-01-18 14:05:47.20392 | condor | 197553 | handler_main_2 | 599 | |
4760984 | deseq2/2.11.40.4 | new | 2019-01-18 14:56:37.700714 | | | handler_main_1 | 599 | |
4766092 | deseq2/2.11.40.4 | new | 2019-01-21 07:24:16.232376 | | | handler_main_5 | 599 | |
4811598 | cuffnorm/2.2.1.2 | running | 2019-02-01 13:08:30.400016 | condor | 248432 | handler_main_0 | 599 | |
(8 rows) | |
You can also query all non-terminal jobs by all users | |
$ gxadmin query jobs-nonterminal | head | |
id | tool_id | state | create_time | runner | ext_id | handler | user_id | |
---------+---------------------+---------+----------------------------+--------+--------+-----------------+--------- | |
4760549 | featurecounts/1.6.3 | running | 2019-01-18 14:05:14.871711 | condor | 197549 | handler_main_7 | 599 | |
4760552 | featurecounts/1.6.3 | running | 2019-01-18 14:05:16.205867 | condor | 197552 | handler_main_7 | 599 | |
4760554 | featurecounts/1.6.3 | running | 2019-01-18 14:05:17.170157 | condor | 197580 | handler_main_8 | 599 | |
4760557 | featurecounts/1.6.3 | running | 2019-01-18 14:05:18.25044 | condor | 197545 | handler_main_10 | 599 | |
4760573 | featurecounts/1.6.3 | running | 2019-01-18 14:05:47.20392 | condor | 197553 | handler_main_2 | 599 | |
4760588 | featurecounts/1.6.3 | new | 2019-01-18 14:11:03.766558 | | | handler_main_9 | 11 | |
4760589 | featurecounts/1.6.3 | new | 2019-01-18 14:11:05.895232 | | | handler_main_1 | 11 | |
4760590 | featurecounts/1.6.3 | new | 2019-01-18 14:11:07.328533 | | | handler_main_2 | 11 | |
By default jobs in the states 'new', 'queued', and 'running' are considered non-terminal, but this can | |
be controlled by passing a comma-separated list to the '--states=' parameter. In addition, by default, | |
all non-terminal jobs are displayed, but you can limit this to only jobs created or updated before a | |
certain time with '--older-than='. This option takes a value in the PostgreSQL date/time interval | |
format, see documentation: https://www.postgresql.org/docs/current/functions-datetime.html | |
Be sure to quote intervals containing spaces. Finally, by default, the column returned (and filtered | |
with in the case of '--older-than=') is 'job.create_time', but this can be changed to 'job.update_time' | |
with '--update-time'. So to return all queued and running jobs that have not been updated in the past 2 | |
days: | |
$ gxadmin query jobs-nonterminal --states=queued,running --older-than='2 days' --update-time | head -5 | |
id | tool_id | state | update_time | runner | ext_id | handler | user_id | |
--------+----------------------+---------+---------------------+--------------+--------+------------------+--------- | |
335897 | trinity/2.9.1 | queued | 2021-03-10 10:44:09 | bridges | 335897 | main_w3_handler2 | 599 | |
338554 | repeatmasker/4.0.9 | running | 2021-03-09 10:41:30 | jetstream_iu | 338554 | main_w4_handler2 | 11 | |
338699 | hisat2/2.1.0+galaxy7 | queued | 2021-03-10 05:36:26 | jetstream_iu | 338699 | main_w3_handler2 | 42 | |
EOF | |
states='new,queued,running' | |
interval= | |
user_filter='true' | |
time_column='create_time' | |
if (( $# > 0 )); then | |
for args in "$@"; do | |
if [ "$args" = '--update-time' ]; then | |
time_column='update_time' | |
elif [ "${args:0:9}" = '--states=' ]; then | |
states="${args:9}" | |
elif [ "${args:0:13}" = '--older-than=' ]; then | |
interval="${args:13}" | |
elif [ "${args:0:2}" != '==' ]; then | |
user_filter=$(get_user_filter "$1") | |
fi | |
done | |
fi | |
states="'$(echo "$states" | sed "s/,/', '/g")'" | |
if [ -n "$interval" ]; then | |
interval="AND job.$time_column < NOW() - interval '$interval'" | |
fi | |
user_id=$(gdpr_safe job.user_id user_id "anon") | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.id, job.tool_id, job.state, job.$time_column AT TIME ZONE 'UTC' AS $time_column, job.job_runner_name, job.job_runner_external_id, job.handler, $user_id | |
FROM | |
job | |
LEFT OUTER JOIN | |
galaxy_user ON job.user_id = galaxy_user.id | |
WHERE | |
$user_filter AND job.state IN ($states) $interval | |
ORDER BY job.id ASC | |
EOF | |
} | |
query_jobs-per-user() { ##? <user>: Number of jobs run by a specific user | |
handle_help "$@" <<-EOF | |
$ gxadmin query jobs-per-user helena | |
count | user_id | |
-------+--------- | |
999 | 1 | |
(1 row) | |
EOF | |
user_filter=$(get_user_filter "$arg_user") | |
read -r -d '' QUERY <<-EOF | |
SELECT count(*), user_id | |
FROM job | |
WHERE user_id in ( | |
SELECT id | |
FROM galaxy_user | |
WHERE $user_filter | |
) | |
GROUP BY user_id | |
EOF | |
} | |
query_recent-jobs() { ##? <hours>: Jobs run in the past <hours> (in any state) | |
handle_help "$@" <<-EOF | |
$ gxadmin query recent-jobs 2.1 | |
id | create_time | tool_id | state | username | |
---------+---------------------+-----------------------+-------+----------------- | |
4383997 | 2018-10-05 16:07:00 | Filter1 | ok | | |
4383994 | 2018-10-05 16:04:00 | echo_main_condor | ok | | |
4383993 | 2018-10-05 16:04:00 | echo_main_drmaa | error | | |
4383992 | 2018-10-05 16:04:00 | echo_main_handler11 | ok | | |
4383983 | 2018-10-05 16:04:00 | echo_main_handler2 | ok | | |
4383982 | 2018-10-05 16:04:00 | echo_main_handler1 | ok | | |
4383981 | 2018-10-05 16:04:00 | echo_main_handler0 | ok | | |
EOF | |
username=$(gdpr_safe galaxy_user.username username) | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.id, | |
job.create_time AT TIME ZONE 'UTC' as create_time, | |
job.tool_id, | |
job.state, $username | |
FROM job, galaxy_user | |
WHERE job.create_time > (now() AT TIME ZONE 'UTC' - '$arg_hours hours'::interval) AND job.user_id = galaxy_user.id | |
ORDER BY id desc | |
EOF | |
} | |
query_training-list() { ##? [--all]: List known trainings | |
handle_help "$@" <<-EOF | |
This module is specific to EU's implementation of Training Infrastructure as a Service. But this specifically just checks for all groups with the name prefix 'training-' | |
$ gxadmin query training-list | |
name | created | |
------------+------------ | |
hogeschool | 2020-01-22 | |
asdf | 2019-08-28 | |
(2 rows) | |
EOF | |
d1="" | |
d2="AND deleted = false" | |
if [[ -n "$arg_all" ]]; then | |
d1=", deleted" | |
d2="" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
substring(name from 10) as name, | |
date_trunc('day', create_time AT TIME ZONE 'UTC')::date as created | |
$d1 | |
FROM galaxy_group | |
WHERE name like 'training-%' $d2 | |
ORDER BY create_time DESC | |
EOF | |
} | |
query_training-members() { ##? <tr_id>: List users in a specific training | |
handle_help "$@" <<-EOF | |
$ gxadmin query training-members hts2018 | |
username | joined | |
--------------------+--------------------- | |
helena-rasche | 2018-09-21 21:42:01 | |
EOF | |
# Remove training- if they used it. | |
ww=$(echo "$arg_tr_id" | sed 's/^training-//g') | |
username=$(gdpr_safe galaxy_user.username username) | |
read -r -d '' QUERY <<-EOF | |
SELECT DISTINCT ON ($username) | |
$username, | |
date_trunc('second', user_group_association.create_time AT TIME ZONE 'UTC') as joined | |
FROM galaxy_user, user_group_association, galaxy_group | |
WHERE galaxy_group.name = 'training-$ww' | |
AND galaxy_group.id = user_group_association.group_id | |
AND user_group_association.user_id = galaxy_user.id | |
EOF | |
} | |
query_training-members-remove() { ##? <training> <username> [--yesdoit]: Remove a user from a training | |
handle_help "$@" <<-EOF | |
EOF | |
# TODO: Move to mutate | |
# Remove training- if they used it. | |
ww=$(echo "$arg_training" | sed 's/^training-//g') | |
if [[ -n $arg_yesdoit ]]; then | |
results="$(query_tsv "$qstr")" | |
uga_id=$(echo "$results" | awk -F'\t' '{print $1}') | |
if (( uga_id > -1 )); then | |
qstr="delete from user_group_association where id = $uga_id" | |
fi | |
echo "$qstr" | |
else | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
user_group_association.id, | |
galaxy_user.username, | |
galaxy_group.name | |
FROM | |
user_group_association | |
LEFT JOIN galaxy_user ON user_group_association.user_id = galaxy_user.id | |
LEFT JOIN galaxy_group ON galaxy_group.id = user_group_association.group_id | |
WHERE | |
galaxy_group.name = 'training-$ww' | |
AND galaxy_user.username = '$arg_username' | |
EOF | |
fi | |
} | |
query_largest-histories() { ##? [--human]: Largest histories in Galaxy | |
handle_help "$@" <<-EOF | |
Finds all histories and print by decreasing size | |
$ gxadmin query largest-histories | |
total_size | id | substring | username | |
------------+----+------------+---------- | |
17215831 | 6 | Unnamed hi | helena | |
45433 | 8 | Unnamed hi | helena | |
42846 | 9 | Unnamed hi | helena | |
1508 | 10 | Circos | helena | |
365 | 2 | Tag Testin | helena | |
158 | 44 | test | helena | |
16 | 45 | Unnamed hi | alice | |
Or you can supply the --human flag, but this should not be used with iquery/InfluxDB | |
$ gxadmin query largest-histories --human | |
total_size | id | substring | userna | |
------------+----+------------+------- | |
16 MB | 6 | Unnamed hi | helena | |
44 kB | 8 | Unnamed hi | helena | |
42 kB | 9 | Unnamed hi | helena | |
1508 bytes | 10 | Circos | helena | |
365 bytes | 2 | Tag Testin | helena | |
158 bytes | 44 | test | helena | |
16 bytes | 45 | Unnamed hi | alice | |
EOF | |
username=$(gdpr_safe galaxy_user.username username) | |
fields="size=0" | |
tags="id=1;name=2;username=3" | |
total_size="sum(coalesce(dataset.total_size, dataset.file_size, 0)) as total_size" | |
if [[ -n "$arg_human" ]]; then | |
total_size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as total_size" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
$total_size, | |
history.id, | |
substring(history.name, 1, 10), | |
$username | |
FROM | |
dataset | |
JOIN history_dataset_association on dataset.id = history_dataset_association.dataset_id | |
JOIN history on history_dataset_association.history_id = history.id | |
JOIN galaxy_user on history.user_id = galaxy_user.id | |
GROUP BY history.id, history.name, history.user_id, galaxy_user.username | |
ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC | |
EOF | |
} | |
query_training-queue() { ##? <training_id>: Jobs currently being run by people in a given training | |
handle_help "$@" <<-EOF | |
Finds all jobs by people in that queue (including things they are executing that are not part of a training) | |
$ gxadmin query training-queue hts2018 | |
state | id | extid | tool_id | username | created | |
--------+---------+--------+---------+---------------+--------------------- | |
queued | 4350274 | 225743 | upload1 | | 2018-09-26 10:00:00 | |
EOF | |
# Remove training- if they used it. | |
ww=$(echo "$arg_training_id" | sed 's/^training-//g') | |
username=$(gdpr_safe galaxy_user.username username) | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.state, | |
job.id, | |
job.job_runner_external_id AS extid, | |
job.tool_id, | |
$username, | |
job.create_time AT TIME ZONE 'UTC' AS created | |
FROM | |
job, galaxy_user | |
WHERE | |
job.user_id = galaxy_user.id | |
AND job.create_time > (now() AT TIME ZONE 'UTC' - '3 hours'::interval) | |
AND galaxy_user.id | |
IN ( | |
SELECT | |
galaxy_user.id | |
FROM | |
galaxy_user, user_group_association, galaxy_group | |
WHERE | |
galaxy_group.name = 'training-$ww' | |
AND galaxy_group.id = user_group_association.group_id | |
AND user_group_association.user_id = galaxy_user.id | |
) | |
ORDER BY | |
job.create_time ASC | |
EOF | |
} | |
query_disk-usage() { ##? [--human]: Disk usage per object store. | |
handle_help "$@" <<-EOF | |
Query the different object stores referenced in your Galaxy database | |
$ gxadmin query disk-usage | |
object_store_id | sum | |
-----------------+------------ | |
| 1387857517 | |
(1 row) | |
Or you can supply the --human flag, but this should not be used with iquery/InfluxDB | |
$ gxadmin query disk-usage --human | |
object_store_id | sum | |
-----------------+------------ | |
| 1324 MB | |
(1 row) | |
EOF | |
fields="count=1" | |
tags="object_store_id=0" | |
size="sum(coalesce(dataset.total_size, dataset.file_size, 0))" | |
if [[ -n "$arg_human" ]]; then | |
size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as sum" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
object_store_id, $size | |
FROM dataset | |
WHERE NOT purged | |
GROUP BY object_store_id | |
ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC | |
EOF | |
} | |
query_users-count() { ## : Shows sums of active/external/deleted/purged accounts | |
handle_help "$@" <<-EOF | |
active | external | deleted | purged | count | |
--------+----------+---------+--------+------- | |
f | f | f | f | 182 | |
t | f | t | t | 2 | |
t | f | t | f | 6 | |
t | f | f | f | 2350 | |
f | f | t | t | 36 | |
EOF | |
fields="count=4" | |
tags="active=0;external=1;deleted=2;purged=3" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
active, external, deleted, purged, count(*) as count | |
FROM | |
galaxy_user | |
GROUP BY | |
active, external, deleted, purged | |
EOF | |
} | |
query_tool-last-used-date() { ## : When was the most recent invocation of every tool | |
handle_help "$@" <<-EOF | |
Example invocation: | |
$ gxadmin query tool-last-used-date | |
max | tool_id | |
---------------------+--------------------------- | |
2019-02-01 00:00:00 | test_history_sanitization | |
2018-12-01 00:00:00 | require_format | |
2018-11-01 00:00:00 | upload1 | |
(3 rows) | |
**WARNING** | |
!> It is not truly every tool, there is no easy way to find the tools which have never been run. | |
EOF | |
read -r -d '' QUERY <<-EOF | |
select max(date_trunc('month', create_time AT TIME ZONE 'UTC')), tool_id from job group by tool_id order by max desc | |
EOF | |
} | |
query_users-total() { ## : Total number of Galaxy users (incl deleted, purged, inactive) | |
handle_help "$@" <<-EOF | |
EOF | |
fields="count=0" | |
tags="" | |
read -r -d '' QUERY <<-EOF | |
SELECT count(*) FROM galaxy_user | |
EOF | |
} | |
query_groups-list() { ## : List all groups known to Galaxy | |
handle_help "$@" <<-EOF | |
EOF | |
fields="count=1" | |
tags="group_name=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
galaxy_group.name, count(*) | |
FROM | |
galaxy_group, user_group_association | |
WHERE | |
user_group_association.group_id = galaxy_group.id | |
GROUP BY name | |
EOF | |
} | |
query_collection-usage() { ## : Information about how many collections of various types are used | |
handle_help "$@" <<-EOF | |
EOF | |
fields="count=1" | |
tags="group_name=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
dc.collection_type, count(*) | |
FROM | |
history_dataset_collection_association as hdca | |
INNER JOIN | |
dataset_collection as dc | |
ON hdca.collection_id = dc.id | |
GROUP BY | |
dc.collection_type | |
EOF | |
} | |
query_ts-repos() { ## : Counts of toolshed repositories by toolshed and owner. | |
handle_help "$@" <<-EOF | |
EOF | |
fields="count=2" | |
tags="tool_shed=0;owner=1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
tool_shed, owner, count(*) | |
FROM | |
tool_shed_repository | |
GROUP BY | |
tool_shed, owner | |
EOF | |
} | |
query_tool-metrics() { ##? <tool_id> <metric_id> [--like]: See values of a specific metric | |
handle_help "$@" <<-EOF | |
A good way to use this is to fetch the memory usage of a tool and then | |
do some aggregations. The following requires [data_hacks](https://github.com/bitly/data_hacks) | |
$ gxadmin tsvquery tool-metrics %rgrnastar/rna_star% memory.max_usage_in_bytes --like | \\ | |
awk '{print \$1 / 1024 / 1024 / 1024}' | \\ | |
histogram.py --percentage | |
# NumSamples = 441; Min = 2.83; Max = 105.88 | |
# Mean = 45.735302; Variance = 422.952289; SD = 20.565804; Median 51.090900 | |
# each ∎ represents a count of 1 | |
2.8277 - 13.1324 [ 15]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (3.40%) | |
13.1324 - 23.4372 [ 78]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (17.69%) | |
23.4372 - 33.7419 [ 47]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (10.66%) | |
33.7419 - 44.0466 [ 31]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (7.03%) | |
44.0466 - 54.3514 [ 98]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (22.22%) | |
54.3514 - 64.6561 [ 102]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (23.13%) | |
64.6561 - 74.9608 [ 55]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (12.47%) | |
74.9608 - 85.2655 [ 11]: ∎∎∎∎∎∎∎∎∎∎∎ (2.49%) | |
85.2655 - 95.5703 [ 3]: ∎∎∎ (0.68%) | |
95.5703 - 105.8750 [ 1]: ∎ (0.23%) | |
EOF | |
tool_subquery="SELECT id FROM job WHERE tool_id = '$arg_tool_id'" | |
if [[ -n "$arg_like" ]]; then | |
tool_subquery="SELECT id FROM job WHERE tool_id like '$arg_tool_id'" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
metric_value | |
FROM job_metric_numeric | |
WHERE | |
metric_name = '$arg_metric_id' | |
and | |
job_id in ( | |
$tool_subquery | |
) | |
EOF | |
} | |
query_tool-available-metrics() { ##? <tool_id>: list all available metrics for a given tool | |
handle_help "$@" <<-EOF | |
Gives a list of available metrics, which can then be used to query. | |
$ gxadmin query tool-available-metrics upload1 | |
metric_name | |
------------------------------------- | |
memory.stat.total_rss | |
memory.stat.total_swap | |
memory.stat.total_unevictable | |
memory.use_hierarchy | |
... | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
distinct metric_name | |
FROM job_metric_numeric | |
WHERE job_id in ( | |
SELECT id FROM job WHERE tool_id = '$arg_tool_id' | |
) | |
ORDER BY metric_name asc | |
EOF | |
} | |
query_tool-memory-per-inputs() { ##? <tool_id> [--like]: See memory usage and inout size data | |
handle_help "$@" <<-EOF | |
Display details about tool input counts and sizes along with memory usage and the relation between them, | |
to aid in determining appropriate memory allocations for tools. | |
$ gxadmin query tool-memory-per-inputs %/unicycler/% --like | |
id | tool_id | input_count | total_input_size_mb | mean_input_size_mb | median_input_size_mb | memory_used_mb | memory_used_per_input_mb | memory_mean_input_ratio | memory_median_input_ratio | |
----------+--------------------------------------------------------------+-------------+---------------------+--------------------+----------------------+----------------+--------------------------+-------------------------+--------------------------- | |
34663027 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 245 | 122 | 122 | 4645 | 19 | 38 | 38 | |
34657045 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 51 | 25 | 25 | 1739 | 34 | 68 | 68 | |
34655863 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 1829 | 915 | 915 | 20635 | 11 | 23 | 23 | |
34650581 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 3 | 235 | 78 | 112 | 30550 | 130 | 391 | 274 | |
34629187 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 2411 | 1206 | 1206 | 50018 | 21 | 41 | 41 | |
A good way to use this is to fetch the data and then do some aggregations. The following requires | |
[data_hacks](https://github.com/bitly/data_hacks): | |
$ gxadmin tsvquery tool-memory-per-inputs %/unicycler/% --like | \\ | |
awk '{print \$10}' | histogram.py --percentage --max=256 | |
# NumSamples = 870; Min = 4.00; Max = 256.00 | |
# 29 values outside of min/max | |
# Mean = 67.804598; Variance = 15461.789404; SD = 124.345444; Median 37.000000 | |
# each ∎ represents a count of 4 | |
4.0000 - 29.2000 [ 368]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (42.30%) | |
29.2000 - 54.4000 [ 226]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (25.98%) | |
54.4000 - 79.6000 [ 133]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (15.29%) | |
79.6000 - 104.8000 [ 45]: ∎∎∎∎∎∎∎∎∎∎∎ (5.17%) | |
104.8000 - 130.0000 [ 28]: ∎∎∎∎∎∎∎ (3.22%) | |
130.0000 - 155.2000 [ 12]: ∎∎∎ (1.38%) | |
155.2000 - 180.4000 [ 9]: ∎∎ (1.03%) | |
180.4000 - 205.6000 [ 6]: ∎ (0.69%) | |
205.6000 - 230.8000 [ 10]: ∎∎ (1.15%) | |
230.8000 - 256.0000 [ 4]: ∎ (0.46%) | |
EOF | |
tool_clause="j.tool_id = '$arg_tool_id'" | |
if [[ -n "$arg_like" ]]; then | |
tool_clause="j.tool_id like '$arg_tool_id'" | |
fi | |
read -r -d '' QUERY <<-EOF | |
WITH job_cte AS ( | |
SELECT | |
j.id, | |
j.tool_id | |
FROM | |
job j | |
WHERE | |
$tool_clause | |
AND | |
j.state = 'ok' | |
), | |
mem_cte AS ( | |
SELECT | |
j.id, | |
jmn.metric_value AS memory_used | |
FROM | |
job_cte j | |
JOIN | |
job_metric_numeric jmn ON j.id = jmn.job_id | |
WHERE | |
jmn.plugin = 'cgroup' | |
AND | |
jmn.metric_name = 'memory.memsw.max_usage_in_bytes' | |
), | |
data_cte AS ( | |
SELECT | |
j.id, | |
count(jtid.id) AS input_count, | |
sum(d.total_size) AS total_input_size, | |
avg(d.total_size) AS mean_input_size, | |
percentile_cont(0.5) WITHIN GROUP (ORDER BY d.total_size) AS median_input_size | |
FROM | |
job_cte j | |
JOIN | |
job_to_input_dataset jtid ON j.id = jtid.job_id | |
JOIN | |
history_dataset_association hda ON jtid.dataset_id = hda.id | |
JOIN | |
dataset d ON hda.dataset_id = d.id | |
GROUP BY | |
j.id | |
) | |
SELECT | |
j.*, | |
d.input_count, | |
(d.total_input_size/1024/1024)::bigint AS total_input_size_mb, | |
(d.mean_input_size/1024/1024)::bigint AS mean_input_size_mb, | |
(d.median_input_size/1024/1024)::bigint AS median_input_size_mb, | |
(m.memory_used/1024/1024)::bigint AS memory_used_mb, | |
(m.memory_used/d.total_input_size)::bigint AS memory_used_per_input_mb, | |
(m.memory_used/d.mean_input_size)::bigint AS memory_mean_input_ratio, | |
(m.memory_used/d.median_input_size)::bigint AS memory_median_input_ratio | |
FROM | |
job_cte j | |
JOIN | |
mem_cte m on j.id = m.id | |
JOIN | |
data_cte d on j.id = d.id | |
ORDER BY | |
j.id DESC | |
EOF | |
} | |
query_monthly-cpu-stats() { ##? [year] : CPU years/hours allocated to tools by month | |
handle_help "$@" <<-EOF | |
This uses the galaxy_slots and runtime_seconds metrics in order to | |
calculate allocated CPU years/hours. This will not be the value of what is | |
actually consumed by your jobs, you should use cgroups. | |
$ gxadmin query monthly-cpu-stats | |
month | cpu_years | cpu_hours | |
------------+-----------+----------- | |
2020-05-01 | 53.55 | 469088.02 | |
2020-04-01 | 59.55 | 521642.60 | |
2020-03-01 | 57.04 | 499658.86 | |
2020-02-01 | 53.93 | 472390.31 | |
2020-01-01 | 56.49 | 494887.37 | |
... | |
EOF | |
if [ ! -z $arg_year ] && date -d "$arg_year" >/dev/null | |
then | |
filter_by_year="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$arg_year-01-01'::date" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('month', job.create_time AT TIME ZONE 'UTC')::date as month, | |
round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365 ), 2) as cpu_years, | |
round(sum((a.metric_value * b.metric_value) / 3600 ), 2) as cpu_hours | |
FROM | |
job_metric_numeric a, | |
job_metric_numeric b, | |
job | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'galaxy_slots' | |
$filter_by_year | |
GROUP BY month | |
ORDER BY month DESC | |
EOF | |
} | |
query_monthly-cpu-years() { ## : CPU years allocated to tools by month | |
handle_help "$@" <<-EOF | |
This uses the galaxy_slots and runtime_seconds metrics in order to | |
calculate allocated CPU years. This will not be the value of what is | |
actually consumed by your jobs, you should use cgroups. | |
$ gxadmin query monthly-cpu-years | |
month | cpu_years | |
------------+----------- | |
2019-04-01 | 2.95 | |
2019-03-01 | 12.38 | |
2019-02-01 | 11.47 | |
2019-01-01 | 8.27 | |
2018-12-01 | 11.42 | |
2018-11-01 | 16.99 | |
2018-10-01 | 12.09 | |
2018-09-01 | 6.27 | |
2018-08-01 | 9.06 | |
2018-07-01 | 6.17 | |
2018-06-01 | 5.73 | |
2018-05-01 | 7.36 | |
2018-04-01 | 10.21 | |
2018-03-01 | 5.20 | |
2018-02-01 | 4.53 | |
2018-01-01 | 4.05 | |
2017-12-01 | 2.44 | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('month', job.create_time)::date as month, | |
round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) as cpu_years | |
FROM | |
job_metric_numeric a, | |
job_metric_numeric b, | |
job | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'galaxy_slots' | |
GROUP BY date_trunc('month', job.create_time) | |
ORDER BY date_trunc('month', job.create_time) DESC | |
EOF | |
} | |
query_monthly-data(){ ##? [year] [--human]: Number of active users per month, running jobs | |
handle_help "$@" <<-EOF | |
Find out how much data was ingested or created by Galaxy during the past months. | |
$ gxadmin query monthly-data 2018 | |
month | pg_size_pretty | |
------------+---------------- | |
2018-12-01 | 62 TB | |
2018-11-01 | 50 TB | |
2018-10-01 | 59 TB | |
2018-09-01 | 32 TB | |
2018-08-01 | 26 TB | |
2018-07-01 | 42 TB | |
2018-06-01 | 34 TB | |
2018-05-01 | 33 TB | |
2018-04-01 | 27 TB | |
2018-03-01 | 32 TB | |
2018-02-01 | 18 TB | |
2018-01-01 | 16 TB | |
EOF | |
size="sum(coalesce(dataset.total_size, dataset.file_size, 0))" | |
if [[ -n $arg_human ]]; then | |
size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0)))" | |
fi | |
if [[ -n $arg_year ]]; then | |
where="WHERE date_trunc('year', dataset.create_time AT TIME ZONE 'UTC') = '$arg_year-01-01'::date" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('month', dataset.create_time AT TIME ZONE 'UTC')::date AS month, | |
$size | |
FROM | |
dataset | |
$where | |
GROUP BY | |
month | |
ORDER BY | |
month DESC | |
EOF | |
} | |
query_monthly-gpu-years() { ## : GPU years allocated to tools by month | |
handle_help "$@" <<-EOF | |
This uses the CUDA_VISIBLE_DEVICES and runtime_seconds metrics in order to | |
calculate allocated GPU years. This will not be the value of what is | |
actually consumed by your jobs, you should use cgroups. Only works if the | |
environment variable 'CUDA_VISIBLE_DEVICES' is recorded as job metric by Galaxy. | |
Requires Nvidia GPUs. | |
$ gxadmin query monthly-gpu-years | |
month | gpu_years | |
------------+----------- | |
2019-04-01 | 2.95 | |
2019-03-01 | 12.38 | |
2019-02-01 | 11.47 | |
2019-01-01 | 8.27 | |
2018-12-01 | 11.42 | |
2018-11-01 | 16.99 | |
2018-10-01 | 12.09 | |
2018-09-01 | 6.27 | |
2018-08-01 | 9.06 | |
2018-07-01 | 6.17 | |
2018-06-01 | 5.73 | |
2018-05-01 | 7.36 | |
2018-04-01 | 10.21 | |
2018-03-01 | 5.20 | |
2018-02-01 | 4.53 | |
2018-01-01 | 4.05 | |
2017-12-01 | 2.44 | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('month', job.create_time)::date as month, | |
round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) as gpu_years | |
FROM | |
job_metric_numeric a, | |
job_metric_text b, | |
job | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'CUDA_VISIBLE_DEVICES' | |
GROUP BY date_trunc('month', job.create_time) | |
ORDER BY date_trunc('month', job.create_time) DESC | |
EOF | |
} | |
query_user-cpu-years() { ## : CPU years allocated to tools by user | |
handle_help "$@" <<-EOF | |
This uses the galaxy_slots and runtime_seconds metrics in order to | |
calculate allocated CPU years. This will not be the value of what is | |
actually consumed by your jobs, you should use cgroups. | |
rank | user_id | username | cpu_years | |
----- | ------- | ----------- | ---------- | |
1 | | 123f911b5f1 | 20.35 | |
2 | | cb0fabc0002 | 14.93 | |
3 | | 7e9e9b00b89 | 14.24 | |
4 | | 42f211e5e87 | 14.06 | |
5 | | 26cdba62c93 | 12.97 | |
6 | | fa87cddfcae | 7.01 | |
7 | | 44d2a648aac | 6.70 | |
8 | | 66c57b41194 | 6.43 | |
9 | | 6b1467ac118 | 5.45 | |
10 | | d755361b59a | 5.19 | |
EOF | |
username=$(gdpr_safe galaxy_user.username username 'Anonymous') | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
row_number() OVER (ORDER BY round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) DESC) as rank, | |
job.user_id, | |
$username, | |
round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) as cpu_years | |
FROM | |
job_metric_numeric a, | |
job_metric_numeric b, | |
job | |
FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'galaxy_slots' | |
GROUP BY job.user_id, galaxy_user.username | |
ORDER BY round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) DESC | |
LIMIT 50 | |
EOF | |
} | |
query_user-gpu-years() { ## : GPU years allocated to tools by user | |
handle_help "$@" <<-EOF | |
This uses the CUDA_VISIBLE_DEVICES and runtime_seconds metrics in order to | |
calculate allocated GPU years. This will not be the value of what is | |
actually consumed by your jobs, you should use cgroups. Only works if the | |
environment variable 'CUDA_VISIBLE_DEVICES' is recorded as job metric by Galaxy. | |
Requires Nvidia GPUs. | |
rank | user_id | username | gpu_years | |
----- | ------- | ----------- | ---------- | |
1 | | 123f911b5f1 | 20.35 | |
2 | | cb0fabc0002 | 14.93 | |
3 | | 7e9e9b00b89 | 14.24 | |
4 | | 42f211e5e87 | 14.06 | |
5 | | 26cdba62c93 | 12.97 | |
6 | | fa87cddfcae | 7.01 | |
7 | | 44d2a648aac | 6.70 | |
8 | | 66c57b41194 | 6.43 | |
9 | | 6b1467ac118 | 5.45 | |
10 | | d755361b59a | 5.19 | |
EOF | |
username=$(gdpr_safe galaxy_user.username username 'Anonymous') | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
row_number() OVER (ORDER BY round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) DESC) as rank, | |
job.user_id, | |
$username, | |
round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) as gpu_years | |
FROM | |
job_metric_numeric a, | |
job_metric_text b, | |
job | |
FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'CUDA_VISIBLE_DEVICES' | |
GROUP BY job.user_id, galaxy_user.username | |
ORDER BY round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) DESC | |
LIMIT 50 | |
EOF | |
} | |
query_user-disk-usage() { ##? [--human] [--use-precalc]: Retrieve an approximation of the disk usage for users | |
handle_help "$@" <<-EOF | |
This uses the dataset size and the history association in order to | |
calculate total disk usage for a user. This is currently limited | |
to the 50 users with the highest storage usage. | |
By default it prints the storage usage in bytes but you can use --human: | |
rank | user id | username | email | storage usage | |
----- | -------- | ----------- | ----------- | ------------- | |
1 | 5 | 123f911b5f1 | [email protected] | 20.35 TB | |
2 | 6 | cb0fabc0002 | [email protected] | 14.93 TB | |
3 | 9 | 7e9e9b00b89 | [email protected] | 14.24 TB | |
4 | 11 | 42f211e5e87 | [email protected] | 14.06 GB | |
5 | 2 | 26cdba62c93 | [email protected] | 12.97 GB | |
6 | 1005 | fa87cddfcae | [email protected] | 7.01 GB | |
7 | 2009 | 44d2a648aac | [email protected] | 6.70 GB | |
8 | 432 | 66c57b41194 | [email protected] | 6.43 GB | |
9 | 58945 | 6b1467ac118 | [email protected] | 5.45 MB | |
10 | 10 | d755361b59a | [email protected] | 5.19 KB | |
A flag, --use-precalc, is provided which reads the disk_usage column of the galaxy_user table, using the values precisely as displayed to users in Galaxy. | |
EOF | |
username=$(gdpr_safe galaxy_user.username user_name 'Anonymous') | |
useremail=$(gdpr_safe galaxy_user.email user_email 'Anonymous') | |
fields="size=4" | |
tags="userid=1;username=2" | |
if [[ -n $arg_use_precalc ]]; then | |
size="disk_usage as \"storage usage\"" | |
if [[ -n $arg_human ]]; then | |
size="pg_size_pretty(disk_usage) as \"storage usage\"" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
row_number() OVER (ORDER BY galaxy_user.disk_usage DESC) as rank, | |
galaxy_user.id as "user id", | |
$username, | |
$useremail, | |
$size | |
FROM | |
galaxy_user | |
GROUP BY galaxy_user.id | |
ORDER BY 1 | |
LIMIT 50 | |
EOF | |
else | |
size="sum(coalesce(dataset.total_size, dataset.file_size, 0)) as \"storage usage\"" | |
if [[ -n $arg_human ]]; then | |
size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as \"storage usage\"" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
row_number() OVER (ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC) as rank, | |
galaxy_user.id as "user id", | |
$username, | |
$useremail, | |
$size | |
FROM | |
dataset, | |
galaxy_user, | |
history_dataset_association, | |
history | |
WHERE | |
NOT dataset.purged | |
AND dataset.id = history_dataset_association.dataset_id | |
AND history_dataset_association.history_id = history.id | |
AND history.user_id = galaxy_user.id | |
GROUP BY galaxy_user.id | |
ORDER BY 1 | |
LIMIT 50 | |
EOF | |
fi | |
} | |
query_user-disk-quota() { ## : Retrieves the 50 users with the largest quotas | |
handle_help "$@" <<-EOF | |
This calculates the total assigned disk quota to users. | |
It only displays the top 50 quotas. | |
rank | user_id | username | quota | |
----- | -------- | ------------ | ------------ | |
1 | | 123f911b5f1 | 20.35 | |
2 | | cb0fabc0002 | 14.93 | |
3 | | 7e9e9b00b89 | 14.24 | |
4 | | 42f211e5e87 | 14.06 | |
5 | | 26cdba62c93 | 12.97 | |
6 | | fa87cddfcae | 7.01 | |
7 | | 44d2a648aac | 6.70 | |
8 | | 66c57b41194 | 6.43 | |
9 | | 6b1467ac118 | 5.45 | |
10 | | d755361b59a | 5.19 | |
EOF | |
username=$(gdpr_safe galaxy_user.username username 'Anonymous') | |
read -r -d '' QUERY <<-EOF | |
WITH user_basequota_list AS ( | |
SELECT galaxy_user.id as "user_id", | |
basequota.bytes as "quota" | |
FROM galaxy_user, | |
quota basequota, | |
user_quota_association | |
WHERE galaxy_user.id = user_quota_association.user_id | |
AND basequota.id = user_quota_association.quota_id | |
AND basequota.operation = '=' | |
AND NOT basequota.deleted | |
GROUP BY galaxy_user.id, basequota.bytes | |
), | |
user_basequota AS ( | |
SELECT user_basequota_list.user_id, | |
MAX(user_basequota_list.quota) as "quota" | |
FROM user_basequota_list | |
GROUP BY user_basequota_list.user_id | |
), | |
user_addquota_list AS ( | |
SELECT galaxy_user.id as "user_id", | |
addquota.bytes as "quota" | |
FROM galaxy_user, | |
quota addquota, | |
user_quota_association | |
WHERE galaxy_user.id = user_quota_association.user_id | |
AND addquota.id = user_quota_association.quota_id | |
AND addquota.operation = '+' | |
AND NOT addquota.deleted | |
GROUP BY galaxy_user.id, addquota.bytes | |
), | |
user_addquota AS ( | |
SELECT user_addquota_list.user_id, | |
sum(user_addquota_list.quota) AS "quota" | |
FROM user_addquota_list | |
GROUP BY user_addquota_list.user_id | |
), | |
user_minquota_list AS ( | |
SELECT galaxy_user.id as "user_id", | |
minquota.bytes as "quota" | |
FROM galaxy_user, | |
quota minquota, | |
user_quota_association | |
WHERE galaxy_user.id = user_quota_association.user_id | |
AND minquota.id = user_quota_association.quota_id | |
AND minquota.operation = '-' | |
AND NOT minquota.deleted | |
GROUP BY galaxy_user.id, minquota.bytes | |
), | |
user_minquota AS ( | |
SELECT user_minquota_list.user_id, | |
sum(user_minquota_list.quota) AS "quota" | |
FROM user_minquota_list | |
GROUP BY user_minquota_list.user_id | |
), | |
group_basequota_list AS ( | |
SELECT galaxy_user.id as "user_id", | |
galaxy_group.id as "group_id", | |
basequota.bytes as "quota" | |
FROM galaxy_user, | |
galaxy_group, | |
quota basequota, | |
group_quota_association, | |
user_group_association | |
WHERE galaxy_user.id = user_group_association.user_id | |
AND galaxy_group.id = user_group_association.group_id | |
AND basequota.id = group_quota_association.quota_id | |
AND galaxy_group.id = group_quota_association.group_id | |
AND basequota.operation = '=' | |
AND NOT basequota.deleted | |
GROUP BY galaxy_user.id, galaxy_group.id, basequota.bytes | |
), | |
group_basequota AS ( | |
SELECT group_basequota_list.user_id, | |
group_basequota_list.group_id, | |
MAX(group_basequota_list.quota) as "quota" | |
FROM group_basequota_list | |
GROUP BY group_basequota_list.user_id, group_basequota_list.group_id | |
), | |
group_addquota_list AS ( | |
SELECT galaxy_user.id as "user_id", | |
addquota.bytes as "quota" | |
FROM galaxy_user, | |
galaxy_group, | |
quota addquota, | |
group_quota_association, | |
user_group_association | |
WHERE galaxy_user.id = user_group_association.user_id | |
AND galaxy_group.id = user_group_association.group_id | |
AND addquota.id = group_quota_association.quota_id | |
AND galaxy_group.id = group_quota_association.group_id | |
AND addquota.operation = '+' | |
AND NOT addquota.deleted | |
GROUP BY galaxy_user.id, addquota.bytes | |
), | |
group_addquota AS ( | |
SELECT group_addquota_list.user_id, | |
sum(group_addquota_list.quota) AS "quota" | |
FROM group_addquota_list | |
GROUP BY group_addquota_list.user_id | |
), | |
group_minquota_list AS ( | |
SELECT galaxy_user.id as "user_id", | |
minquota.bytes as "quota" | |
FROM galaxy_user, | |
galaxy_group, | |
quota minquota, | |
group_quota_association, | |
user_group_association | |
WHERE galaxy_user.id = user_group_association.user_id | |
AND galaxy_group.id = user_group_association.group_id | |
AND minquota.id = group_quota_association.quota_id | |
AND galaxy_group.id = group_quota_association.group_id | |
AND minquota.operation = '-' | |
AND NOT minquota.deleted | |
GROUP BY galaxy_user.id, galaxy_group.id, galaxy_group.name, minquota.bytes | |
), | |
group_minquota AS ( | |
SELECT group_minquota_list.user_id, | |
sum(group_minquota_list.quota) AS "quota" | |
FROM group_minquota_list | |
GROUP BY group_minquota_list.user_id | |
), | |
all_user_default_quota AS ( | |
SELECT galaxy_user.id as "user_id", | |
quota.bytes | |
FROM galaxy_user, | |
quota | |
WHERE quota.id = (SELECT quota_id FROM default_quota_association) | |
), | |
quotas AS ( | |
SELECT all_user_default_quota.user_id as "aud_uid", | |
all_user_default_quota.bytes as "aud_quota", | |
user_basequota.user_id as "ubq_uid", | |
user_basequota.quota as "ubq_quota", | |
user_addquota.user_id as "uaq_uid", | |
user_addquota.quota as "uaq_quota", | |
user_minquota.user_id as "umq_uid", | |
user_minquota.quota as "umq_quota", | |
group_basequota.user_id as "gbq_uid", | |
group_basequota.quota as "gbq_quota", | |
group_addquota.user_id as "gaq_uid", | |
group_addquota.quota as "gaq_quota", | |
group_minquota.user_id as "gmq_uid", | |
group_minquota.quota as "gmq_quota" | |
FROM all_user_default_quota | |
FULL OUTER JOIN user_basequota ON all_user_default_quota.user_id = user_basequota.user_id | |
FULL OUTER JOIN user_addquota ON all_user_default_quota.user_id = user_addquota.user_id | |
FULL OUTER JOIN user_minquota ON all_user_default_quota.user_id = user_minquota.user_id | |
FULL OUTER JOIN group_basequota ON all_user_default_quota.user_id = group_basequota.user_id | |
FULL OUTER JOIN group_addquota ON all_user_default_quota.user_id = group_addquota.user_id | |
FULL OUTER JOIN group_minquota ON all_user_default_quota.user_id = group_minquota.user_id | |
), | |
computed_quotas AS ( | |
SELECT aud_uid as "user_id", | |
COALESCE(GREATEST(ubq_quota, gbq_quota), aud_quota) as "base_quota", | |
(COALESCE(uaq_quota, 0) + COALESCE(gaq_quota, 0)) as "add_quota", | |
(COALESCE(umq_quota, 0) + COALESCE(gmq_quota, 0)) as "min_quota" | |
FROM quotas | |
) | |
SELECT row_number() OVER (ORDER BY (computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) DESC) as rank, | |
galaxy_user.id as "user_id", | |
$username, | |
pg_size_pretty(computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) as "quota" | |
FROM computed_quotas, | |
galaxy_user | |
WHERE computed_quotas.user_id = galaxy_user.id | |
GROUP BY galaxy_user.id, galaxy_user.username, computed_quotas.base_quota, computed_quotas.add_quota, computed_quotas.min_quota | |
ORDER BY (computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) DESC | |
LIMIT 50 | |
EOF | |
} | |
query_group-cpu-seconds() { ##? [group]: Retrieve an approximation of the CPU time in seconds for group(s) | |
handle_help "$@" <<-EOF | |
This uses the galaxy_slots and runtime_seconds metrics in order to | |
calculate allocated CPU time in seconds. This will not be the value of | |
what is actually consumed by jobs of the group, you should use cgroups instead. | |
rank | group_id | group_name | cpu_seconds | |
----- | -------- | ------------ | ------------ | |
1 | | 123f911b5f1 | 20.35 | |
2 | | cb0fabc0002 | 14.93 | |
3 | | 7e9e9b00b89 | 14.24 | |
4 | | 42f211e5e87 | 14.06 | |
5 | | 26cdba62c93 | 12.97 | |
6 | | fa87cddfcae | 7.01 | |
7 | | 44d2a648aac | 6.70 | |
8 | | 66c57b41194 | 6.43 | |
9 | | 6b1467ac118 | 5.45 | |
10 | | d755361b59a | 5.19 | |
EOF | |
where="" | |
if [[ -n $arg_group ]]; then | |
where="AND galaxy_group.name = '$arg_group'" | |
fi | |
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous') | |
read -r -d '' QUERY <<-EOF | |
WITH jobs_info AS ( | |
SELECT job.user_id, | |
round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds | |
FROM job_metric_numeric AS a, | |
job_metric_numeric AS b, | |
job | |
WHERE job.id = a.job_id | |
AND job.id = b.job_id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'galaxy_slots' | |
GROUP BY job.user_id | |
), user_job_info AS ( | |
SELECT user_id, | |
sum(cpu_seconds) AS cpu_seconds | |
FROM jobs_info | |
GROUP BY user_id | |
) | |
SELECT row_number() OVER (ORDER BY round(sum(user_job_info.cpu_seconds), 0) DESC) as rank, | |
galaxy_group.id as group_id, | |
$groupname, | |
round(sum(user_job_info.cpu_seconds), 0) as cpu_seconds | |
FROM user_job_info, | |
galaxy_group, | |
user_group_association | |
WHERE user_job_info.user_id = user_group_association.user_id | |
AND user_group_association.group_id = galaxy_group.id | |
$where | |
GROUP BY galaxy_group.id, galaxy_group.name | |
ORDER BY round(sum(user_job_info.cpu_seconds), 0) DESC | |
LIMIT 50 | |
EOF | |
} | |
query_group-gpu-time() { ##? [group]: Retrieve an approximation of the GPU time for users | |
handle_help "$@" <<-EOF | |
This uses the galaxy_slots and runtime_seconds metrics in order to | |
calculate allocated GPU time. This will not be the value of what is | |
actually consumed by jobs of the group, you should use cgroups instead. | |
Only works if the environment variable 'CUDA_VISIBLE_DEVICES' is | |
recorded as job metric by Galaxy. Requires Nvidia GPUs. | |
rank | group_id | group_name | gpu_seconds | |
----- | -------- | ----------- | ----------- | |
1 | | 123f911b5f1 | 20.35 | |
2 | | cb0fabc0002 | 14.93 | |
3 | | 7e9e9b00b89 | 14.24 | |
4 | | 42f211e5e87 | 14.06 | |
5 | | 26cdba62c93 | 12.97 | |
6 | | fa87cddfcae | 7.01 | |
7 | | 44d2a648aac | 6.70 | |
8 | | 66c57b41194 | 6.43 | |
9 | | 6b1467ac118 | 5.45 | |
10 | | d755361b59a | 5.19 | |
EOF | |
where="" | |
if [[ -n $arg_group ]]; then | |
where="AND galaxy_group.name = '$arg_group'" | |
fi | |
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous') | |
read -r -d '' QUERY <<-EOF | |
WITH jobs_info AS ( | |
SELECT job.user_id, | |
round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds | |
FROM job_metric_numeric AS a, | |
job_metric_text AS b, | |
job | |
WHERE job.id = a.job_id | |
AND job.id = b.job_id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'CUDA_VISIBLE_DEVICES' | |
GROUP BY job.user_id | |
), user_job_info AS ( | |
SELECT user_id, | |
sum(gpu_seconds) AS gpu_seconds | |
FROM jobs_info | |
GROUP BY user_id | |
) | |
SELECT row_number() OVER (ORDER BY round(sum(user_job_info.gpu_seconds), 0) DESC) as rank, | |
galaxy_group.id as group_id, | |
$groupname, | |
round(sum(user_job_info.gpu_seconds), 0) as gpu_seconds | |
FROM user_job_info, | |
galaxy_group, | |
user_group_association | |
WHERE user_job_info.user_id = user_group_association.user_id | |
AND user_group_association.group_id = galaxy_group.id | |
$where | |
GROUP BY galaxy_group.id, galaxy_group.name | |
ORDER BY round(sum(user_job_info.gpu_seconds), 0) DESC | |
LIMIT 50 | |
EOF | |
} | |
query_monthly-users-registered(){ ## [year] [--by_group]: Number of users registered each month | |
handle_help "$@" <<-EOF | |
Number of users that registered each month. **NOTE**: Does not include anonymous users or users in no group. | |
Parameters: | |
--by_group: Will separate out registrations by galaxy user group as well | |
year: Will return monthly user registrations from the start of [year] till now | |
$ gxadmin query monthly-users 2020 --by_group | |
month | Group name | count | |
------------+------------+------- | |
2020-08-01 | Group_1 | 1 | |
2020-08-01 | Group_2 | 1 | |
2020-08-01 | Group_3 | 1 | |
2020-08-01 | Group_4 | 3 | |
2020-07-01 | Group_1 | 1 | |
2020-07-01 | Group_2 | 6 | |
2020-07-01 | Group_3 | 2 | |
2020-07-01 | Group_4 | 6 | |
2020-07-01 | Group_5 | 2 | |
2020-07-01 | Group_6 | 1 | |
... | |
EOF | |
if (( $# > 0 )); then | |
for args in "$@"; do | |
if [ "$args" = "--by_group" ]; then | |
where_g="galaxy_user.id = user_group_association.user_id and galaxy_group.id = user_group_association.group_id" | |
select="galaxy_group.name," | |
from="galaxy_group, user_group_association," | |
group=", galaxy_group.name" | |
else | |
where_y="date_trunc('year', galaxy_user.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date" | |
fi | |
done | |
if (( $# > 1 )); then | |
where="WHERE $where_y and $where_g" | |
else | |
where="WHERE $where_y $where_g" | |
fi | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('month', galaxy_user.create_time)::DATE AS month, | |
$select | |
count(*) | |
FROM | |
$from | |
galaxy_user | |
$where | |
GROUP BY | |
month | |
$group | |
ORDER BY | |
month DESC | |
EOF | |
} | |
query_monthly-users-active(){ ## [year] [--by_group]: Number of active users per month, running jobs | |
handle_help "$@" <<-EOF | |
Number of unique users each month who ran jobs. **NOTE**: does not include anonymous users. | |
Parameters: | |
--by_group: Separate out active users by galaxy user group | |
year: Will return monthly active users from the start of [year] till now | |
$ gxadmin query monthly-users-active 2018 | |
month | active_users | |
------------+-------------- | |
2018-12-01 | 811 | |
2018-11-01 | 658 | |
2018-10-01 | 583 | |
2018-09-01 | 444 | |
2018-08-01 | 342 | |
2018-07-01 | 379 | |
2018-06-01 | 370 | |
2018-05-01 | 330 | |
2018-04-01 | 274 | |
2018-03-01 | 186 | |
2018-02-01 | 168 | |
2018-01-01 | 122 | |
EOF | |
if (( $# > 0 )); then | |
for args in "$@"; do | |
if [ "$args" = "--by_group" ]; then | |
where_g="job.user_id = user_group_association.user_id and user_group_association.group_id = galaxy_group.id" | |
select="galaxy_group.name," | |
from=", user_group_association, galaxy_group" | |
group=", galaxy_group.name" | |
else | |
where_y="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date" | |
fi | |
done | |
if (( $# > 1 )); then | |
where="WHERE $where_y and $where_g" | |
else | |
where="WHERE $where_y $where_g" | |
fi | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('month', job.create_time AT TIME ZONE 'UTC')::date as month, | |
$select | |
count(distinct job.user_id) as active_users | |
FROM | |
job | |
$from | |
$where | |
GROUP BY | |
month | |
$group | |
ORDER BY | |
month DESC | |
EOF | |
} | |
query_monthly-jobs(){ ## [year] [--by_group]: Number of jobs run each month | |
handle_help "$@" <<-EOF | |
Count jobs run each month | |
Parameters: | |
--by_group: Will separate out job counts for each month by galaxy user group | |
year: Will return number of monthly jobs run from the start of [year] till now | |
$ gxadmin query monthly-jobs 2018 | |
month | count | |
------------+-------- | |
2018-12-01 | 96941 | |
2018-11-01 | 94625 | |
2018-10-01 | 156940 | |
2018-09-01 | 103331 | |
2018-08-01 | 128658 | |
2018-07-01 | 90852 | |
2018-06-01 | 230470 | |
2018-05-01 | 182331 | |
2018-04-01 | 109032 | |
2018-03-01 | 197125 | |
2018-02-01 | 260931 | |
2018-01-01 | 25378 | |
EOF | |
if (( $# > 0 )); then | |
for args in "$@"; do | |
if [ "$args" = "--by_group" ]; then | |
where_g="job.user_id = user_group_association.user_id and galaxy_group.id = user_group_association.group_id" | |
select="galaxy_group.name," | |
from="galaxy_group, user_group_association," | |
group=", galaxy_group.name" | |
else | |
where_y="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date" | |
fi | |
done | |
if (( $# > 1 )); then | |
where="WHERE $where_y and $where_g" | |
else | |
where="WHERE $where_y $where_g" | |
fi | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('month', job.create_time AT TIME ZONE 'UTC')::DATE AS month, | |
$select | |
count(*) | |
FROM | |
$from | |
job | |
$where | |
GROUP BY | |
month | |
$group | |
ORDER BY | |
month DESC | |
EOF | |
} | |
query_total-jobs(){ ## : Total number of jobs run by galaxy instance | |
handle_help "$@" <<-EOF | |
Count total number of jobs | |
$ gxadmin query total-jobs | |
state | count | |
---------+------- | |
deleted | 21 | |
error | 197 | |
ok | 798 | |
(3 rows) | |
EOF | |
fields="count=1" | |
tags="state=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
state, count(*) | |
FROM | |
job | |
GROUP BY | |
state | |
ORDER BY | |
state | |
EOF | |
} | |
query_job-history() { ##? <id>: Job state history for a specific job | |
handle_help "$@" <<-EOF | |
$ gxadmin query job-history 1 | |
time | state | |
-------------------------------+-------- | |
2018-11-20 17:15:09.297907+00 | error | |
2018-11-20 17:15:08.911972+00 | queued | |
2018-11-20 17:15:08.243363+00 | new | |
2018-11-20 17:15:08.198301+00 | upload | |
2018-11-20 17:15:08.19655+00 | new | |
(5 rows) | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
create_time AT TIME ZONE 'UTC' as time, | |
state | |
FROM job_state_history | |
WHERE job_id = $arg_id | |
EOF | |
} | |
query_job-inputs() { ##? <id>: Input datasets to a specific job | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
hda.id AS hda_id, | |
hda.state AS hda_state, | |
hda.deleted AS hda_deleted, | |
hda.purged AS hda_purged, | |
d.id AS d_id, | |
d.state AS d_state, | |
d.deleted AS d_deleted, | |
d.purged AS d_purged, | |
d.object_store_id AS object_store_id | |
FROM job j | |
JOIN job_to_input_dataset jtid | |
ON j.id = jtid.job_id | |
JOIN history_dataset_association hda | |
ON hda.id = jtid.dataset_id | |
JOIN dataset d | |
ON hda.dataset_id = d.id | |
WHERE j.id = $arg_id | |
EOF | |
} | |
query_job-outputs() { ##? <id>: Output datasets from a specific job | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
hda.id AS hda_id, | |
hda.state AS hda_state, | |
hda.deleted AS hda_deleted, | |
hda.purged AS hda_purged, | |
d.id AS d_id, | |
d.state AS d_state, | |
d.deleted AS d_deleted, | |
d.purged AS d_purged, | |
d.object_store_id AS object_store_id | |
FROM job j | |
JOIN job_to_output_dataset jtod | |
ON j.id = jtod.job_id | |
JOIN history_dataset_association hda | |
ON hda.id = jtod.dataset_id | |
JOIN dataset d | |
ON hda.dataset_id = d.id | |
WHERE j.id = $arg_id | |
EOF | |
} | |
query_job-info() { ## <-|job_id [job_id [...]]> : Retrieve information about jobs given some job IDs | |
handle_help "$@" <<-EOF | |
Retrieves information on a job, like the host it ran on, | |
how long it ran for and the total memory. | |
id | create_time | update_time | tool_id | hostname | handler | runtime_seconds | memtotal | |
----- | ------------ | ----------- | ------------ | ------------ | -------- | --------------- | -------- | |
1 | | | 123f911b5f1 | 123f911b5f1 | handler0 | 20.35 | 20.35 GB | |
2 | | | cb0fabc0002 | cb0fabc0002 | handler1 | 14.93 | 5.96 GB | |
3 | | | 7e9e9b00b89 | 7e9e9b00b89 | handler1 | 14.24 | 3.53 GB | |
4 | | | 42f211e5e87 | 42f211e5e87 | handler4 | 14.06 | 1.79 GB | |
5 | | | 26cdba62c93 | 26cdba62c93 | handler0 | 12.97 | 1.21 GB | |
6 | | | fa87cddfcae | fa87cddfcae | handler1 | 7.01 | 987 MB | |
7 | | | 44d2a648aac | 44d2a648aac | handler3 | 6.70 | 900 MB | |
8 | | | 66c57b41194 | 66c57b41194 | handler1 | 6.43 | 500 MB | |
9 | | | 6b1467ac118 | 6b1467ac118 | handler0 | 5.45 | 250 MB | |
10 | | | d755361b59a | d755361b59a | handler2 | 5.19 | 100 MB | |
EOF | |
assert_count_ge $# 1 "Missing job IDs" | |
if [[ "$1" == "-" ]]; then | |
# read jobs from stdin | |
job_ids=$(cat | paste -s -d' ') | |
else | |
# read from $@ | |
job_ids=$@; | |
fi | |
job_ids_string=$(join_by ',' ${job_ids[@]}) | |
read -r -d '' QUERY <<-EOF | |
WITH hostname_query AS ( | |
SELECT job_id, | |
metric_value as hostname | |
FROM job_metric_text | |
WHERE job_id IN ($job_ids_string) | |
AND metric_name='HOSTNAME' | |
), | |
metric_num_query AS ( | |
SELECT job_id, | |
SUM(CASE WHEN metric_name='runtime_seconds' THEN metric_value END) runtime_seconds, | |
pg_size_pretty(SUM(CASE WHEN metric_name='memtotal' THEN metric_value END)) memtotal | |
FROM job_metric_numeric | |
WHERE job_id IN ($job_ids_string) | |
AND metric_name IN ('runtime_seconds', 'memtotal') | |
GROUP BY job_id | |
) | |
SELECT job.id, | |
job.create_time, | |
job.update_time, | |
job.tool_id, | |
job.handler, | |
hostname_query.hostname, | |
metric_num_query.runtime_seconds, | |
metric_num_query.memtotal | |
FROM job | |
FULL OUTER JOIN hostname_query ON hostname_query.job_id = job.id | |
FULL OUTER JOIN metric_num_query ON metric_num_query.job_id = job.id | |
WHERE job.id IN ($job_ids_string) | |
EOF | |
} | |
query_old-histories(){ ##? <weeks>: Lists histories that haven't been updated (used) for <weeks> | |
handle_help "$@" <<-EOF | |
Histories and their users who haven't been updated for a specified number of weeks. Default number of weeks is 15. | |
$gxadmin query old-histories 52 | |
id | update_time | user_id | email | name | published | deleted | purged | hid_counter | |
-------+----------------------------+---------+---------+--------------------+-----------+---------+--------+------------- | |
39903 | 2017-06-13 12:35:07.174749 | 834 | xxx@xxx | Unnamed history | f | f | f | 23 | |
1674 | 2017-06-13 14:08:30.017574 | 9 | xxx@xxx | SAHA project | f | f | f | 47 | |
40088 | 2017-06-15 04:10:48.879122 | 986 | xxx@xxx | Unnamed history | f | f | f | 3 | |
39023 | 2017-06-15 09:33:12.007002 | 849 | xxx@xxx | prac 4 new final | f | f | f | 297 | |
35437 | 2017-06-16 04:41:13.15785 | 731 | xxx@xxx | Unnamed history | f | f | f | 98 | |
40123 | 2017-06-16 13:43:24.948344 | 987 | xxx@xxx | Unnamed history | f | f | f | 22 | |
40050 | 2017-06-19 00:46:29.032462 | 193 | xxx@xxx | Telmatactis | f | f | f | 74 | |
12212 | 2017-06-20 14:41:03.762881 | 169 | xxx@xxx | Unnamed history | f | f | f | 24 | |
39523 | 2017-06-21 01:34:52.226653 | 9 | xxx@xxx | OSCC Cell Lines | f | f | f | 139 | |
EOF | |
email=$(gdpr_safe galaxy_user.email 'email') | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
history.id, | |
history.update_time AT TIME ZONE 'UTC' as update_time, | |
history.user_id, | |
$email, | |
history.name, | |
history.published, | |
history.deleted, | |
history.purged, | |
history.hid_counter | |
FROM | |
history, | |
galaxy_user | |
WHERE | |
history.update_time < (now() AT TIME ZONE 'UTC' - '$arg_weeks weeks'::interval) AND | |
history.user_id = galaxy_user.id AND | |
history.deleted = FALSE AND | |
history.published = FALSE | |
ORDER BY | |
history.update_time desc | |
EOF | |
} | |
# TODO(hxr): generic summation by metric? Leave math to consumer? | |
query_jobs-max-by-cpu-hours() { ## : Top 10 jobs by CPU hours consumed (requires CGroups metrics) | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.id, | |
job.tool_id, | |
job.create_time, | |
metric_value/1000000000/3600/24 as cpu_days | |
FROM job, job_metric_numeric | |
WHERE | |
job.id = job_metric_numeric.job_id | |
AND metric_name = 'cpuacct.usage' | |
ORDER BY cpu_hours desc | |
LIMIT 30 | |
EOF | |
} | |
query_errored-jobs(){ ##? <hours> [--details]: Lists jobs that errored in the last N hours. | |
handle_help "$@" <<-EOF | |
Lists details of jobs that have status = 'error' for the specified number of hours. Default = 24 hours | |
$ gxadmin query errored-jobs 2 | |
id | create_time | tool_id | tool_version | handler | destination_id | job_runner_external_id | email | |
----+-------------+---------+--------------+----------+----------------+------------------------+------------------ | |
1 | | upload1 | 1.1.0 | handler2 | slurm_normal | 42 | [email protected] | |
2 | | cut1 | 1.1.1 | handler1 | slurm_normal | 43 | [email protected] | |
3 | | bwa | 0.7.17.1 | handler0 | slurm_multi | 44 | [email protected] | |
4 | | trinity | 2.9.1 | handler1 | pulsar_bigmem | 4 | [email protected] | |
EOF | |
email=$(gdpr_safe galaxy_user.email 'email') | |
details= | |
if [[ -n "$arg_details" ]]; then | |
details="job.job_stderr," | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.id, | |
job.create_time AT TIME ZONE 'UTC' as create_time, | |
job.tool_id, | |
job.tool_version, | |
job.handler, | |
job.destination_id, | |
job.job_runner_external_id, | |
$details | |
FROM | |
job, | |
galaxy_user | |
WHERE | |
job.create_time >= (now() AT TIME ZONE 'UTC' - '$arg_hours hours'::interval) AND | |
job.state = 'error' AND | |
job.user_id = galaxy_user.id | |
ORDER BY | |
job.id | |
EOF | |
} | |
query_workflow-invocation-status() { ## : Report on how many workflows are in new state by handler | |
handle_help "$@" <<-EOF | |
Really only intended to be used in influx queries. | |
EOF | |
fields="count=3" | |
tags="scheduler=0;handler=1;state=2" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
COALESCE(scheduler, 'none'), | |
COALESCE(handler, 'none'), | |
state, | |
count(*) | |
FROM | |
workflow_invocation | |
WHERE state in ('new', 'ready') | |
GROUP BY handler, scheduler, state | |
EOF | |
} | |
query_workflow-invocation-totals() { ## : Report on overall workflow counts, to ensure throughput | |
handle_help "$@" <<-EOF | |
Really only intended to be used in influx queries. | |
EOF | |
fields="count=1" | |
tags="state=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
COALESCE(state, 'unknown'), count(*) | |
FROM | |
workflow_invocation | |
GROUP BY state | |
EOF | |
} | |
query_tool-new-errors() { ##? [weeks|4] [--short-tool-id]: Summarize percent of tool runs in error over the past weeks for "new tools" | |
handle_help "$@" <<-EOF | |
See jobs-in-error summary for recent tools (tools whose first execution is in recent weeks). | |
$ gxadmin query tool-errors --short-tool-id 1 | |
tool_id | tool_runs | percent_errored | percent_failed | count_errored | count_failed | handler | |
-----------------------------------+-----------+-------------------+----------------+---------------+--------------+----------------- | |
rnateam/graphclust_align_cluster/ | 55 | 0.145454545454545 | 0 | 8 | 0 | handler_main_10 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 46 | 0.347826086956522 | 0 | 16 | 0 | handler_main_3 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 43 | 0.186046511627907 | 0 | 8 | 0 | handler_main_0 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 41 | 0.390243902439024 | 0 | 16 | 0 | handler_main_4 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.325 | 0 | 13 | 0 | handler_main_6 | |
Filter1 | 40 | 0.125 | 0 | 5 | 0 | handler_main_0 | |
devteam/bowtie2/bowtie2/2.3.4.3 | 40 | 0.125 | 0 | 5 | 0 | handler_main_7 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.3 | 0 | 12 | 0 | handler_main_2 | |
EOF | |
tool_id="j.tool_id" | |
if [[ -n $arg_short_tool_id ]]; then | |
tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id" | |
fi | |
fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5" | |
tags="tool_id=0;handler=6" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
$tool_id, | |
count(*) AS tool_runs, | |
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END)::float / count(*) AS percent_errored, | |
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed, | |
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END) AS count_errored, | |
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed, | |
j.handler | |
FROM job AS j | |
WHERE | |
j.tool_id | |
IN ( | |
SELECT tool_id | |
FROM job AS j | |
WHERE j.create_time > (now() - '$arg_weeks weeks'::INTERVAL) | |
GROUP BY j.tool_id | |
) | |
GROUP BY j.tool_id, j.handler | |
ORDER BY percent_failed_errored DESC | |
EOF | |
} | |
query_tool-errors() { ##? [--short-tool-id] [weeks|4]: Summarize percent of tool runs in error over the past weeks for all tools that have failed (most popular tools first) | |
handle_help "$@" <<-EOF | |
See jobs-in-error summary for recently executed tools that have failed at least 10% of the time. | |
$ gxadmin query tool-errors --short-tool-id 1 | |
tool_id | tool_runs | percent_errored | percent_failed | count_errored | count_failed | handler | |
-----------------------------------+-----------+-------------------+----------------+---------------+--------------+----------------- | |
rnateam/graphclust_align_cluster/ | 55 | 0.145454545454545 | 0 | 8 | 0 | handler_main_10 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 46 | 0.347826086956522 | 0 | 16 | 0 | handler_main_3 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 43 | 0.186046511627907 | 0 | 8 | 0 | handler_main_0 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 41 | 0.390243902439024 | 0 | 16 | 0 | handler_main_4 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.325 | 0 | 13 | 0 | handler_main_6 | |
Filter1 | 40 | 0.125 | 0 | 5 | 0 | handler_main_0 | |
devteam/bowtie2/bowtie2/2.3.4.3 | 40 | 0.125 | 0 | 5 | 0 | handler_main_7 | |
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.3 | 0 | 12 | 0 | handler_main_2 | |
EOF | |
# TODO: Fix this nonsense for proper args | |
tool_id="j.tool_id" | |
if [[ -n $arg_short_tool_id ]]; then | |
tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id" | |
fi | |
fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5" | |
tags="tool_id=0;handler=6" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
$tool_id, | |
count(*) AS tool_runs, | |
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END)::float / count(*) AS percent_errored, | |
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed, | |
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END) AS count_errored, | |
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed, | |
j.handler | |
FROM | |
job AS j | |
WHERE | |
j.create_time > (now() - '$arg_weeks weeks'::INTERVAL) | |
GROUP BY | |
j.tool_id, j.handler | |
HAVING | |
sum(CASE WHEN j.state IN ('error', 'failed') THEN 1 ELSE 0 END) * 100.0 / count(*) > 10.0 | |
ORDER BY | |
tool_runs DESC | |
EOF | |
} | |
query_tool-likely-broken() { ##? [--short-tool-id] [weeks|4]: Find tools that have been executed in recent weeks that are (or were due to job running) likely substantially broken | |
handle_help "$@" <<-EOF | |
This runs an identical query to tool-errors, except filtering for tools | |
which were run more than 4 times, and have a failure rate over 95%. | |
tool_id | tool_runs | percent_errored | percent_failed | count_errored | count_failed | handler | |
-------------------------------------------------------+-----------+-----------------+----------------+---------------+--------------+----------------- | |
simon-gladman/velvetoptimiser/velvetoptimiser/2.2.6 | 14 | 1 | 0 | 14 | 0 | handler_main_7 | |
bgruening/hicexplorer_hicplottads/hicexplorer_hicplott| 9 | 1 | 0 | 9 | 0 | handler_main_0 | |
bgruening/text_processing/tp_replace_in_column/1.1.3 | 8 | 1 | 0 | 8 | 0 | handler_main_3 | |
bgruening/text_processing/tp_awk_tool/1.1.1 | 7 | 1 | 0 | 7 | 0 | handler_main_5 | |
rnateam/dorina/dorina_search/1.0.0 | 7 | 1 | 0 | 7 | 0 | handler_main_2 | |
bgruening/text_processing/tp_replace_in_column/1.1.3 | 6 | 1 | 0 | 6 | 0 | handler_main_9 | |
rnateam/dorina/dorina_search/1.0.0 | 6 | 1 | 0 | 6 | 0 | handler_main_11 | |
rnateam/dorina/dorina_search/1.0.0 | 6 | 1 | 0 | 6 | 0 | handler_main_8 | |
EOF | |
# TODO: Fix this nonsense for proper args | |
tool_id="j.tool_id" | |
if [[ -n $arg_short_tool_id ]]; then | |
tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id" | |
fi | |
fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5" | |
tags="tool_id=0;handler=6" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
$tool_id, | |
count(*) AS tool_runs, | |
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END)::float / count(*) AS percent_errored, | |
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed, | |
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END) AS count_errored, | |
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed, | |
j.handler | |
FROM | |
job AS j | |
WHERE | |
j.create_time > (now() - '$arg_weeks weeks'::INTERVAL) | |
GROUP BY | |
j.tool_id, j.handler | |
HAVING | |
sum(CASE WHEN j.state IN ('error', 'failed') THEN 1 ELSE 0 END) * 100.0 / count(*) > 95.0 | |
AND count(*) > 4 | |
ORDER BY | |
tool_runs DESC | |
EOF | |
} | |
query_user-recent-aggregate-jobs() { ##? <user> [days|7]: Show aggregate information for jobs in past N days for user (by email/id/username) | |
handle_help "$@" <<-EOF | |
Obtain an overview of tools that a user has run in the past N days | |
EOF | |
# args | |
user_filter=$(get_user_filter "$arg_user") | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
date_trunc('day', create_time), tool_id, state, count(*) | |
FROM | |
job | |
JOIN | |
galaxy_user on galaxy_user.id = job.user_id | |
WHERE | |
$user_filter AND create_time > (now() - '$arg_days days'::INTERVAL) | |
GROUP BY | |
date_trunc, tool_id, state | |
ORDER BY | |
date_trunc DESC | |
EOF | |
} | |
query_user-history-list() { ##? <user> [--size]: List a user's (by email/id/username) histories. | |
handle_help "$@" <<-EOF | |
Obtain an overview of histories of a user. By default orders the histories by date. | |
When using '--size' it overrides the order to size. | |
$ gxadmin query user-history-list <username|id|email> | |
ID | Name | Last Updated | Size | |
-------+--------------------------------------+----------------------------+----------- | |
52 | Unnamed history | 2019-08-08 15:15:32.284678 | 293 MB | |
30906 | Unnamed history | 2019-07-23 16:25:36.084019 | 13 kB | |
EOF | |
# args | |
user_filter=$(get_user_filter "$arg_user") | |
order_col="uh.update_time" | |
if [[ -n "$arg_size" ]]; then | |
order_col="hs.hist_size" | |
fi | |
read -r -d '' QUERY <<-EOF | |
WITH user_histories AS ( | |
SELECT id, | |
name, | |
update_time | |
FROM history | |
WHERE user_id IN ( | |
SELECT id | |
FROM galaxy_user | |
WHERE $user_filter | |
) AND NOT purged | |
), history_sizes AS ( | |
SELECT history_id, | |
sum(coalesce(dataset.total_size, dataset.file_size, 0)) as "hist_size" | |
FROM history_dataset_association, | |
dataset | |
WHERE history_id IN ( | |
SELECT id | |
FROM user_histories | |
) AND history_dataset_association.dataset_id = dataset.id | |
GROUP BY history_id | |
) | |
SELECT uh.id as "ID", | |
uh.name as "Name", | |
uh.update_time as "Last Updated", | |
pg_size_pretty(hs.hist_size) as "Size" | |
FROM user_histories uh, | |
history_sizes hs | |
WHERE uh.id = hs.history_id | |
ORDER BY $order_col DESC | |
EOF | |
} | |
query_history-contents() { ## <history_id> [--dataset|--collection]: List datasets and/or collections in a history | |
handle_help "$@" <<-EOF | |
Obtain an overview of tools that a user has run in the past N days | |
EOF | |
local dsq clq q | |
dsq="select dataset_id, name, hid, visible, deleted, copied_from_history_dataset_association_id as copied_from from history_dataset_association where history_id = $1" | |
clq="select collection_id, name, hid, visible, deleted, copied_from_history_dataset_collection_association_id as copied_from from history_dataset_collection_association where history_id = $1;" | |
if [[ $2 == "--dataset" ]] || [[ $2 == "--datasets" ]]; then | |
q="$dsq" | |
elif [[ $2 == "--collection" ]] || [[ $2 == "--collections" ]]; then | |
q="$clq" | |
else | |
q="$dsq;$clq" | |
fi | |
read -r -d '' QUERY <<-EOF | |
$q | |
EOF | |
} | |
query_hdca-info() { ##? <hdca_id>: Information on a dataset collection | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT * | |
FROM dataset_collection | |
WHERE id = $arg_hdca_id | |
EOF | |
} | |
query_hdca-datasets() { ##? <hdca_id>: List of files in a dataset collection | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT element_index, hda_id, ldda_id, child_collection_id, element_identifier | |
FROM dataset_collection_element | |
WHERE dataset_collection_id = $arg_hdca_id | |
ORDER by element_index asc | |
EOF | |
} | |
query_jobs-queued-internal-by-handler() { ## : How many queued jobs do not have external IDs, by handler | |
handle_help "$@" <<-EOF | |
Identify which handlers have a backlog of jobs which should be | |
receiving external cluster IDs but have not yet. | |
handler | count | |
---------------- + ------ | |
handler_main_0 | 14 | |
handler_main_1 | 4 | |
handler_main_10 | 13 | |
handler_main_2 | 11 | |
handler_main_3 | 14 | |
handler_main_4 | 12 | |
handler_main_5 | 9 | |
handler_main_6 | 7 | |
handler_main_7 | 13 | |
handler_main_8 | 9 | |
handler_main_9 | 14 | |
EOF | |
fields="count=1" | |
tags="handler=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
handler, | |
count(handler) | |
FROM | |
job | |
WHERE | |
state = 'queued' | |
AND job_runner_external_id IS null | |
GROUP BY | |
handler | |
EOF | |
} | |
query_jobs-queued() { ## : How many queued jobs have external cluster IDs | |
handle_help "$@" <<-EOF | |
Shows the distribution of jobs in queued state, whether or not they have received an external ID. | |
n | count | |
------------ | ------ | |
unprocessed | 118 | |
processed | 37 | |
EOF | |
fields="count=1" | |
tags="group=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
CASE WHEN job_runner_external_id IS NOT null THEN 'processed' ELSE 'unprocessed' END as n, | |
count(*) | |
FROM | |
job | |
WHERE | |
state = 'queued' | |
GROUP BY n | |
EOF | |
} | |
query_users-with-oidc() { ## : How many users logged in with OIDC | |
handle_help "$@" <<-EOF | |
provider | count | |
-------- | ------ | |
elixir | 5 | |
EOF | |
fields="count=1" | |
tags="provider=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT provider, count(distinct user_id) FROM oidc_user_authnz_tokens GROUP BY provider | |
EOF | |
} | |
query_history-runtime-system() { ##? <history_id>: Sum of runtimes by all jobs in a history | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
(sum(job_metric_numeric.metric_value)::INT8 || 'seconds')::INTERVAL | |
FROM | |
job LEFT JOIN job_metric_numeric ON job.id = job_metric_numeric.job_id | |
WHERE | |
job.history_id = $arg_history_id AND metric_name = 'runtime_seconds' | |
EOF | |
} | |
query_history-runtime-wallclock() { ##? <history_id>: Time as elapsed by a clock on the wall | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
max(job.update_time) - min(job.create_time) | |
FROM | |
job | |
WHERE | |
job.history_id = $arg_history_id | |
EOF | |
} | |
query_history-runtime-system-by-tool() { ##? <history_id>: Sum of runtimes by all jobs in a history, split by tool | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.tool_id, | |
(sum(job_metric_numeric.metric_value)::INT || 'seconds')::INTERVAL | |
FROM | |
job LEFT JOIN job_metric_numeric ON job.id = job_metric_numeric.job_id | |
WHERE | |
job.history_id = $arg_history_id AND metric_name = 'runtime_seconds' | |
GROUP BY | |
job.tool_id | |
ORDER BY | |
"interval" DESC | |
EOF | |
} | |
query_upload-gb-in-past-hour() { ##? [hours|1]: Sum in bytes of files uploaded in the past hour | |
handle_help "$@" <<-EOF | |
Quick output, mostly useful for graphing, to produce a nice graph of how heavily are people uploading currently. | |
EOF | |
fields="count=0" | |
tags="hours=1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
coalesce(sum(coalesce(dataset.total_size, coalesce(dataset.file_size, 0))), 0), | |
$arg_hours as hours | |
FROM | |
job | |
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id | |
LEFT JOIN history_dataset_association ON | |
job_to_output_dataset.dataset_id = history_dataset_association.id | |
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id | |
WHERE | |
job.tool_id = 'upload1' | |
AND job.create_time AT TIME ZONE 'UTC' > (now() - '$arg_hours hours'::INTERVAL) | |
EOF | |
} | |
query_queue-detail-by-handler() { ##? <handler_id>: List jobs for a specific handler | |
handle_help "$@" <<-EOF | |
List the jobs currently being processed by a specific handler | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, | |
create_time, | |
state, | |
regexp_replace(tool_id, '.*toolshed.*/repos/', ''), | |
job_runner_name, | |
job_runner_external_id, | |
destination_id | |
FROM | |
job | |
WHERE | |
handler = '$arg_handler_id' AND state IN ('new', 'queued', 'running') | |
EOF | |
} | |
query_pg-cache-hit() { ## : Check postgres in-memory cache hit ratio | |
handle_help "$@" <<-EOF | |
Query from: https://www.citusdata.com/blog/2019/03/29/health-checks-for-your-postgres-database/ | |
Tells you about the cache hit ratio, is Postgres managing to store | |
commonly requested objects in memory or are they being loaded every | |
time? | |
heap_read | heap_hit | ratio | |
----------- ---------- ------------------------ | |
29 | 64445 | 0.99955020628470391165 | |
EOF | |
fields="read=0;hit=1;ratio=2" | |
tags="" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
sum(heap_blks_read) as heap_read, | |
sum(heap_blks_hit) as heap_hit, | |
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio | |
FROM | |
pg_statio_user_tables | |
EOF | |
} | |
query_pg-table-bloat() { ##? [--human]: show table and index bloat in your database ordered by most wasteful | |
handle_help "$@" <<-EOF | |
Query from: https://www.citusdata.com/blog/2019/03/29/health-checks-for-your-postgres-database/ | |
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
EOF | |
if [[ -n "$arg_human" ]]; then | |
waste_query="pg_size_pretty(raw_waste)" | |
else | |
waste_query="raw_waste" | |
fi | |
fields="bloat=3;ratio=4" | |
tags="type=0;schema=1;object=2" | |
read -r -d '' QUERY <<-EOF | |
WITH constants AS ( | |
SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 4 AS ma | |
), bloat_info AS ( | |
SELECT | |
ma,bs,schemaname,tablename, | |
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr, | |
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2 | |
FROM ( | |
SELECT | |
schemaname, tablename, hdr, ma, bs, | |
SUM((1-null_frac)*avg_width) AS datawidth, | |
MAX(null_frac) AS maxfracsum, | |
hdr+( | |
SELECT 1+count(*)/8 | |
FROM pg_stats s2 | |
WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename | |
) AS nullhdr | |
FROM pg_stats s, constants | |
GROUP BY 1,2,3,4,5 | |
) AS foo | |
), table_bloat AS ( | |
SELECT | |
schemaname, tablename, cc.relpages, bs, | |
CEIL((cc.reltuples*((datahdr+ma- | |
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta | |
FROM bloat_info | |
JOIN pg_class cc ON cc.relname = bloat_info.tablename | |
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema' | |
), index_bloat AS ( | |
SELECT | |
schemaname, tablename, bs, | |
coalesce(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, c2.relpages,0 AS ipages, | |
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols | |
FROM bloat_info | |
JOIN pg_class cc ON cc.relname = bloat_info.tablename | |
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema' | |
JOIN pg_index i ON indrelid = cc.oid | |
JOIN pg_class c2 ON c2.oid = i.indexrelid | |
) | |
SELECT | |
type, schemaname, object_name, bloat, $waste_query as waste | |
FROM | |
(SELECT | |
'table' as type, | |
schemaname, | |
tablename as object_name, | |
ROUND(CASE WHEN otta=0 THEN 0.0 ELSE table_bloat.relpages/otta::numeric END,1) AS bloat, | |
CASE WHEN relpages < otta THEN '0' ELSE (bs*(table_bloat.relpages-otta)::bigint)::bigint END AS raw_waste | |
FROM | |
table_bloat | |
UNION | |
SELECT | |
'index' as type, | |
schemaname, | |
tablename || '::' || iname as object_name, | |
ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS bloat, | |
CASE WHEN ipages < iotta THEN '0' ELSE (bs*(ipages-iotta))::bigint END AS raw_waste | |
FROM | |
index_bloat) bloat_summary | |
ORDER BY raw_waste DESC, bloat DESC | |
EOF | |
} | |
query_pg-mandelbrot() { ## : show the mandlebrot set | |
handle_help "$@" <<-EOF | |
Copied from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH RECURSIVE Z(IX, IY, CX, CY, X, Y, I) AS ( | |
SELECT IX, IY, X::float, Y::float, X::float, Y::float, 0 | |
FROM (select -2.2 + 0.031 * i, i from generate_series(0,101) as i) as xgen(x,ix), | |
(select -1.5 + 0.031 * i, i from generate_series(0,101) as i) as ygen(y,iy) | |
UNION ALL | |
SELECT IX, IY, CX, CY, X * X - Y * Y + CX AS X, Y * X * 2 + CY, I + 1 | |
FROM Z | |
WHERE X * X + Y * Y < 16::float | |
AND I < 100 | |
) | |
SELECT array_to_string(array_agg(SUBSTRING(' .,,,-----++++%%%%@@@@#### ', LEAST(GREATEST(I,1),27), 1)),'') | |
FROM ( | |
SELECT IX, IY, MAX(I) AS I | |
FROM Z | |
GROUP BY IY, IX | |
ORDER BY IY, IX | |
) AS ZT | |
GROUP BY IY | |
ORDER BY IY | |
EOF | |
} | |
query_pg-index-usage() { ## : calculates your index hit rate (effective databases are at 99% and up) | |
handle_help "$@" <<-EOF | |
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
-1 means "Insufficient Data", this was changed to a numeric value to be acceptable to InfluxDB | |
EOF | |
fields="index_usage=1;rows=2" | |
tags="relname=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT relname, | |
CASE COALESCE(idx_scan, 0) | |
WHEN 0 THEN -1 | |
ELSE (100 * idx_scan / (seq_scan + idx_scan)) | |
END percent_of_times_index_used, | |
n_live_tup rows_in_table | |
FROM | |
pg_stat_user_tables | |
ORDER BY | |
n_live_tup DESC | |
EOF | |
} | |
query_pg-index-size() { ##? [--human]: show table and index bloat in your database ordered by most wasteful | |
handle_help "$@" <<-EOF | |
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
EOF | |
if [[ -n "$arg_human" ]]; then | |
human_size="pg_size_pretty(sum(c.relpages::bigint*8192)::bigint)" | |
else | |
human_size="sum(c.relpages::bigint*8192)::bigint" | |
fi | |
fields="size=1" | |
tags="relname=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
c.relname AS name, | |
$human_size AS size | |
FROM pg_class c | |
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) | |
WHERE n.nspname NOT IN ('pg_catalog', 'information_schema') | |
AND n.nspname !~ '^pg_toast' | |
AND c.relkind='i' | |
GROUP BY c.relname | |
ORDER BY sum(c.relpages) DESC | |
EOF | |
} | |
query_pg-long-running-queries() { ## : show all queries longer than five minutes by descending duration | |
handle_help "$@" <<-EOF | |
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
pid, | |
now() - pg_stat_activity.query_start AS duration, | |
query AS query | |
FROM | |
pg_stat_activity | |
WHERE | |
pg_stat_activity.query <> ''::text | |
AND state <> 'idle' | |
AND now() - pg_stat_activity.query_start > interval '5 minutes' | |
ORDER BY | |
now() - pg_stat_activity.query_start DESC | |
EOF | |
} | |
query_pg-table-size() { ##? [--human]: show the size of the tables (excluding indexes), descending by size | |
handle_help "$@" <<-EOF | |
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
EOF | |
if [[ -n "$arg_human" ]]; then | |
# TODO: there has got to be a less ugly way to do this | |
human_size="pg_size_pretty(" | |
human_after=")" | |
else | |
human_size="" | |
human_after="" | |
fi | |
fields="table_size=1;index_size=2" | |
tags="relname=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
c.relname AS name, | |
${human_size}pg_table_size(c.oid)${human_after} AS size, | |
${human_size}pg_indexes_size(c.oid)${human_after} AS index_size | |
FROM pg_class c | |
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) | |
WHERE n.nspname NOT IN ('pg_catalog', 'information_schema') | |
AND n.nspname !~ '^pg_toast' | |
AND c.relkind='r' | |
ORDER BY pg_table_size(c.oid) DESC | |
EOF | |
} | |
query_pg-unused-indexes() { ##? [--human]: show unused and almost unused indexes | |
handle_help "$@" <<-EOF | |
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
From their documentation: | |
> "Ordered by their size relative to the number of index scans. | |
> Exclude indexes of very small tables (less than 5 pages), | |
> where the planner will almost invariably select a sequential scan, | |
> but may not in the future as the table grows" | |
EOF | |
if [[ -n "$arg_human" ]]; then | |
# TODO: there has got to be a less ugly way to do this | |
human_size="pg_size_pretty(" | |
human_after=")" | |
else | |
human_size="" | |
human_after="" | |
fi | |
fields="index_size=2;index_scans=3" | |
tags="table=0;index=1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
schemaname || '.' || relname AS table, | |
indexrelname AS index, | |
${human_size}pg_relation_size(i.indexrelid)${human_after} AS index_size, | |
COALESCE(idx_scan, 0) as index_scans | |
FROM pg_stat_user_indexes ui | |
JOIN pg_index i ON ui.indexrelid = i.indexrelid | |
WHERE NOT indisunique AND idx_scan < 50 AND pg_relation_size(relid) > 5 * 8192 | |
ORDER BY | |
pg_relation_size(i.indexrelid) / nullif(idx_scan, 0) DESC NULLS FIRST, | |
pg_relation_size(i.indexrelid) DESC | |
EOF | |
} | |
query_pg-vacuum-stats() { ## : show dead rows and whether an automatic vacuum is expected to be triggered | |
handle_help "$@" <<-EOF | |
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH table_opts AS ( | |
SELECT | |
pg_class.oid, relname, nspname, array_to_string(reloptions, '') AS relopts | |
FROM | |
pg_class INNER JOIN pg_namespace ns ON relnamespace = ns.oid | |
), vacuum_settings AS ( | |
SELECT | |
oid, relname, nspname, | |
CASE | |
WHEN relopts LIKE '%autovacuum_vacuum_threshold%' | |
THEN substring(relopts, '.*autovacuum_vacuum_threshold=([0-9.]+).*')::integer | |
ELSE current_setting('autovacuum_vacuum_threshold')::integer | |
END AS autovacuum_vacuum_threshold, | |
CASE | |
WHEN relopts LIKE '%autovacuum_vacuum_scale_factor%' | |
THEN substring(relopts, '.*autovacuum_vacuum_scale_factor=([0-9.]+).*')::real | |
ELSE current_setting('autovacuum_vacuum_scale_factor')::real | |
END AS autovacuum_vacuum_scale_factor | |
FROM | |
table_opts | |
) | |
SELECT | |
vacuum_settings.nspname AS schema, | |
vacuum_settings.relname AS table, | |
to_char(psut.last_vacuum, 'YYYY-MM-DD HH24:MI') AS last_vacuum, | |
to_char(psut.last_autovacuum, 'YYYY-MM-DD HH24:MI') AS last_autovacuum, | |
to_char(pg_class.reltuples, '9G999G999G999') AS rowcount, | |
to_char(psut.n_dead_tup, '9G999G999G999') AS dead_rowcount, | |
to_char(autovacuum_vacuum_threshold | |
+ (autovacuum_vacuum_scale_factor::numeric * pg_class.reltuples), '9G999G999G999') AS autovacuum_threshold, | |
CASE | |
WHEN autovacuum_vacuum_threshold + (autovacuum_vacuum_scale_factor::numeric * pg_class.reltuples) < psut.n_dead_tup | |
THEN 'yes' | |
END AS expect_autovacuum | |
FROM | |
pg_stat_user_tables psut INNER JOIN pg_class ON psut.relid = pg_class.oid | |
INNER JOIN vacuum_settings ON pg_class.oid = vacuum_settings.oid | |
ORDER BY 1 | |
EOF | |
} | |
query_pg-stat-bgwriter() { ## : Stats about the behaviour of the bgwriter, checkpoints, buffers, etc. | |
handle_help "$@" <<-EOF | |
EOF | |
fields="checkpoints_timed=0;checkpoints_req=1;checkpoint_write_time=2;checkpoint_sync_time=3;buffers_checkpoint=4;buffers_clean=5;maxwritten_clean=6;buffers_backend=7;buffers_backend_fsync=8;buffers_alloc=9" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
checkpoints_timed, | |
checkpoints_req, | |
checkpoint_write_time, | |
checkpoint_sync_time, | |
buffers_checkpoint, | |
buffers_clean, | |
maxwritten_clean, | |
buffers_backend, | |
buffers_backend_fsync, | |
buffers_alloc | |
FROM | |
pg_stat_bgwriter | |
EOF | |
} | |
query_pg-stat-user-tables() { ## : stats about tables (tuples, index scans, vacuums, analyzes) | |
handle_help "$@" <<-EOF | |
EOF | |
tags="schemaname=0;relname=1" | |
fields="seq_scan=2;seq_tup_read=3;idx_scan=4;idx_tup_fetch=5;n_tup_ins=6;n_tup_upd=7;n_tup_del=8;n_tup_hot_upd=9;n_live_tup=10;n_dead_tup=11;vacuum_count=12;autovacuum_count=13;analyze_count=14;autoanalyze_count=15" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
schemaname, | |
relname, | |
seq_scan, | |
seq_tup_read, | |
COALESCE(idx_scan, 0), | |
COALESCE(idx_tup_fetch, 0), | |
n_tup_ins, | |
n_tup_upd, | |
n_tup_del, | |
n_tup_hot_upd, | |
n_live_tup, | |
n_dead_tup, | |
vacuum_count, | |
autovacuum_count, | |
analyze_count, | |
autoanalyze_count | |
FROM | |
pg_stat_user_tables | |
EOF | |
} | |
query_data-origin-distribution-merged() { | |
summary="$(summary_statistics data $human)" | |
username=$(gdpr_safe job.user_id galaxy_user) | |
read -r -d '' QUERY <<-EOF | |
WITH asdf AS ( | |
SELECT | |
'total' as origin, | |
sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data, | |
date_trunc('month', dataset.create_time) as created, | |
$username | |
FROM job | |
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id | |
LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id | |
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id | |
GROUP BY | |
origin, job.user_id, created, galaxy_user | |
) | |
SELECT | |
origin, | |
round(data, 2 - length(data::text)), | |
created, | |
galaxy_user | |
FROM asdf | |
ORDER BY galaxy_user desc | |
EOF | |
} | |
query_data-origin-distribution() { ## : data sources (uploaded vs derived) | |
handle_help "$@" <<-EOF | |
Break down the source of data in the server, uploaded data vs derived (created as output from a tool) | |
Recommendation is to run with GDPR_MODE so you can safely share this information: | |
GDPR_MODE=\$(openssl rand -hex 24 2>/dev/null) gxadmin tsvquery data-origin-distribution | gzip > data-origin.tsv.gz | |
Output looks like: | |
derived 130000000000 2019-07-01 00:00:00 fff4f423d06 | |
derived 61000000000 2019-08-01 00:00:00 fff4f423d06 | |
created 340000000 2019-08-01 00:00:00 fff4f423d06 | |
created 19000000000 2019-07-01 00:00:00 fff4f423d06 | |
derived 180000000000 2019-04-01 00:00:00 ffd28c0cf8c | |
created 21000000000 2019-04-01 00:00:00 ffd28c0cf8c | |
derived 1700000000 2019-06-01 00:00:00 ffd28c0cf8c | |
derived 120000000 2019-06-01 00:00:00 ffcb567a837 | |
created 62000000 2019-05-01 00:00:00 ffcb567a837 | |
created 52000000 2019-06-01 00:00:00 ffcb567a837 | |
derived 34000000 2019-07-01 00:00:00 ffcb567a837 | |
EOF | |
username=$(gdpr_safe job.user_id galaxy_user) | |
echo "$username" | |
read -r -d '' QUERY <<-EOF | |
WITH asdf AS ( | |
SELECT | |
case when job.tool_id = 'upload1' then 'created' else 'derived' end AS origin, | |
sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data, | |
date_trunc('month', dataset.create_time) as created, | |
$username | |
FROM job | |
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id | |
LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id | |
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id | |
GROUP BY | |
origin, job.user_id, created, galaxy_user | |
) | |
SELECT | |
origin, | |
round(data, 2 - length(data::text)), | |
created, | |
galaxy_user | |
FROM asdf | |
ORDER BY galaxy_user, created desc | |
EOF | |
} | |
query_data-origin-distribution-summary() { ##? [--human]: breakdown of data sources (uploaded vs derived) | |
handle_help "$@" <<-EOF | |
Break down the source of data in the server, uploaded data vs derived (created as output from a tool) | |
This query builds a table with the volume of derivced and uploaded data per user, and then summarizes this: | |
origin | min | quant_1st | median | mean | quant_3rd | perc_95 | perc_99 | max | stddev | |
------- | ------- | --------- | ------- | ------ | --------- | ------- | ------- | ----- | -------- | |
created | 0 bytes | 17 MB | 458 MB | 36 GB | 11 GB | 130 GB | 568 GB | 11 TB | 257 GB | |
derived | 0 bytes | 39 MB | 1751 MB | 200 GB | 28 GB | 478 GB | 2699 GB | 90 TB | 2279 GB | |
EOF | |
tags="dataorigin=0" | |
fields="min=1;q1=2;median=3;mean=4;q3=5;p95=6;p99=7;max=8;sum=9;stddev=10" | |
summary="$(summary_statistics data $arg_human)" | |
read -r -d '' QUERY <<-EOF | |
WITH user_job_data AS ( | |
SELECT | |
case when job.tool_id = 'upload1' then 'created' else 'derived' end AS origin, | |
sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data, | |
job.user_id | |
FROM job | |
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id | |
LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id | |
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id | |
GROUP BY | |
origin, job.user_id | |
) | |
SELECT | |
origin, | |
$summary | |
FROM user_job_data | |
GROUP BY origin | |
EOF | |
} | |
query_aq() { ## <table> <column> <-|job_id [job_id [...]]>: Given a list of IDs from a table (e.g. 'job'), access a specific column from that table | |
handle_help "$@" <<-EOF | |
EOF | |
table=$1; shift | |
column=$1; shift | |
if [[ "$1" == "-" ]]; then | |
# read jobs from stdin | |
ids=$(cat | paste -s -d' ') | |
else | |
# read from $@ | |
ids=$@; | |
fi | |
ids_string=$(join_by ',' ${ids[@]}) | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
$column | |
FROM $table | |
WHERE id in ($ids_string) | |
EOF | |
} | |
query_q() { ## <query>: Passes a raw SQL query directly through to the database | |
handle_help "$@" <<-EOF | |
EOF | |
QUERY="$@" | |
} | |
query_good-for-pulsar() { ## : Look for jobs EU would like to send to pulsar | |
handle_help "$@" <<-EOF | |
This selects all jobs and finds two things: | |
- sum of input sizes | |
- runtime | |
and then returns a simple /score/ of (input/runtime) and sorts on that | |
hopefully identifying things with small inputs and long runtimes. | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH job_data AS ( | |
SELECT | |
regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id, | |
SUM(d.total_size) AS size, | |
MIN(jmn.metric_value) AS runtime, | |
SUM(d.total_size) / min(jmn.metric_value) AS score | |
FROM job j | |
LEFT JOIN job_to_input_dataset jtid ON j.id = jtid.job_id | |
LEFT JOIN history_dataset_association hda ON jtid.dataset_id = hda.id | |
LEFT JOIN dataset d ON hda.dataset_id = d.id | |
LEFT JOIN job_metric_numeric jmn ON j.id = jmn.job_id | |
WHERE jmn.metric_name = 'runtime_seconds' | |
AND d.total_size IS NOT NULL | |
GROUP BY j.id | |
) | |
SELECT | |
tool_id, | |
percentile_cont(0.50) WITHIN GROUP (ORDER BY score) ::bigint AS median_score, | |
percentile_cont(0.50) WITHIN GROUP (ORDER BY runtime) ::bigint AS median_runtime, | |
pg_size_pretty(percentile_cont(0.50) WITHIN GROUP (ORDER BY size) ::bigint) AS median_size, | |
count(*) | |
FROM job_data | |
GROUP BY tool_id | |
ORDER BY median_score ASC | |
EOF | |
} | |
query_jobs-ready-to-run() { ## : Find jobs ready to run (Mostly a performance test) | |
handle_help "$@" <<-EOF | |
Mostly a performance test | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
EXISTS( | |
SELECT | |
history_dataset_association.id, | |
history_dataset_association.history_id, | |
history_dataset_association.dataset_id, | |
history_dataset_association.create_time, | |
history_dataset_association.update_time, | |
history_dataset_association.state, | |
history_dataset_association.copied_from_history_dataset_association_id, | |
history_dataset_association.copied_from_library_dataset_dataset_association_id, | |
history_dataset_association.name, | |
history_dataset_association.info, | |
history_dataset_association.blurb, | |
history_dataset_association.peek, | |
history_dataset_association.tool_version, | |
history_dataset_association.extension, | |
history_dataset_association.metadata, | |
history_dataset_association.parent_id, | |
history_dataset_association.designation, | |
history_dataset_association.deleted, | |
history_dataset_association.visible, | |
history_dataset_association.extended_metadata_id, | |
history_dataset_association.version, | |
history_dataset_association.hid, | |
history_dataset_association.purged, | |
history_dataset_association.hidden_beneath_collection_instance_id | |
FROM | |
history_dataset_association, | |
job_to_output_dataset | |
WHERE | |
job.id = job_to_output_dataset.job_id | |
AND history_dataset_association.id | |
= job_to_output_dataset.dataset_id | |
AND history_dataset_association.deleted = true | |
) | |
AS anon_1, | |
EXISTS( | |
SELECT | |
history_dataset_collection_association.id | |
FROM | |
history_dataset_collection_association, | |
job_to_output_dataset_collection | |
WHERE | |
job.id = job_to_output_dataset_collection.job_id | |
AND history_dataset_collection_association.id | |
= job_to_output_dataset_collection.dataset_collection_id | |
AND history_dataset_collection_association.deleted | |
= true | |
) | |
AS anon_2, | |
job.id AS job_id, | |
job.create_time AS job_create_time, | |
job.update_time AS job_update_time, | |
job.history_id AS job_history_id, | |
job.library_folder_id AS job_library_folder_id, | |
job.tool_id AS job_tool_id, | |
job.tool_version AS job_tool_version, | |
job.state AS job_state, | |
job.info AS job_info, | |
job.copied_from_job_id AS job_copied_from_job_id, | |
job.command_line AS job_command_line, | |
job.dependencies AS job_dependencies, | |
job.param_filename AS job_param_filename, | |
job.runner_name AS job_runner_name_1, | |
job.stdout AS job_stdout, | |
job.stderr AS job_stderr, | |
job.exit_code AS job_exit_code, | |
job.traceback AS job_traceback, | |
job.session_id AS job_session_id, | |
job.user_id AS job_user_id, | |
job.job_runner_name AS job_job_runner_name, | |
job.job_runner_external_id | |
AS job_job_runner_external_id, | |
job.destination_id AS job_destination_id, | |
job.destination_params AS job_destination_params, | |
job.object_store_id AS job_object_store_id, | |
job.imported AS job_imported, | |
job.params AS job_params, | |
job.handler AS job_handler | |
FROM | |
job | |
WHERE | |
job.state = 'new' | |
AND job.handler IS NULL | |
AND job.handler = 'handler0' | |
EOF | |
} | |
query_workers() { ## : Retrieve a list of Galaxy worker processes | |
handle_help "$@" <<-EOF | |
This retrieves a list of Galaxy worker processes. | |
This functionality is only available on Galaxy | |
20.01 or later. | |
server_name | hostname | pid | |
------------------- | -------- | --- | |
main.web.1 | server1 | 123 | |
main.job-handlers.1 | server2 | 456 | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
server_name, | |
hostname, | |
pid | |
FROM | |
worker_process | |
WHERE | |
pid IS NOT NULL | |
EOF | |
} | |
query_pg-rows-per-table() { ## : Print rows per table | |
handle_help "$@" <<-EOF | |
This retrieves a list of tables in the database and their size | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
n.nspname AS table_schema, | |
c.relname AS table_name, | |
c.reltuples AS rows | |
FROM | |
pg_class AS c | |
JOIN pg_namespace AS n ON | |
n.oid = c.relnamespace | |
WHERE | |
c.relkind = 'r' | |
AND n.nspname | |
NOT IN ( | |
'information_schema', | |
'pg_catalog' | |
) | |
ORDER BY | |
c.reltuples DESC | |
EOF | |
} | |
query_dump-users() { ##? [--apikey] [--email] : Dump the list of users and their emails | |
handle_help "$@" <<-EOF | |
This retrieves a list of all users | |
EOF | |
if [[ -n "$arg_email" ]]; then | |
email=",$(gdpr_safe email email)" | |
else | |
email="" | |
fi | |
if [[ -n "$arg_apikey" ]]; then | |
apikey="apikey" | |
apikeyjoin="left join api_keys " | |
else | |
email="" | |
fi | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
username | |
FROM | |
galaxy_user | |
ORDER BY | |
id desc | |
EOF | |
} | |
query_job-metrics() { ## : Retrieves input size, runtime, memory for all executed jobs | |
handle_help "$@" <<-EOF | |
Dump runtime stats for ALL jobs: | |
$ gxadmin query job-metrics | |
job_id | tool_id | state | total_filesize | num_files | runtime_seconds | slots | memory_bytes | create_time | |
--------+--------------------------------------+---------+----------------+-----------+-----------------+-----------+--------------+---------------------------- | |
19 | require_format | ok | 5098 | 1 | 4.0000000 | 1.0000000 | | 2018-12-04 17:17:02.148239 | |
48 | __SET_METADATA__ | ok | | 0 | 4.0000000 | 1.0000000 | | 2019-02-05 22:46:33.848141 | |
49 | upload1 | ok | | | 6.0000000 | 1.0000000 | | 2019-02-05 22:58:41.610146 | |
50 | upload1 | ok | | | 5.0000000 | 1.0000000 | | 2019-02-07 21:30:11.645826 | |
51 | upload1 | ok | | | 5.0000000 | 1.0000000 | | 2019-02-07 21:30:12.18259 | |
52 | upload1 | ok | | | 7.0000000 | 1.0000000 | | 2019-02-07 21:31:15.304868 | |
54 | upload1 | ok | | | 5.0000000 | 1.0000000 | | 2019-02-07 21:31:16.116164 | |
53 | upload1 | ok | | | 7.0000000 | 1.0000000 | | 2019-02-07 21:31:15.665948 | |
... | |
989 | circos | error | 671523 | 12 | 14.0000000 | 1.0000000 | | 2020-04-30 10:13:33.872872 | |
990 | circos | error | 671523 | 12 | 10.0000000 | 1.0000000 | | 2020-04-30 10:19:36.72646 | |
991 | circos | error | 671523 | 12 | 10.0000000 | 1.0000000 | | 2020-04-30 10:21:00.460471 | |
992 | circos | ok | 671523 | 12 | 21.0000000 | 1.0000000 | | 2020-04-30 10:31:35.366913 | |
993 | circos | error | 588747 | 6 | 8.0000000 | 1.0000000 | | 2020-04-30 11:12:17.340591 | |
994 | circos | error | 588747 | 6 | 9.0000000 | 1.0000000 | | 2020-04-30 11:15:27.076502 | |
995 | circos | error | 588747 | 6 | 42.0000000 | 1.0000000 | | 2020-04-30 11:16:41.19449 | |
996 | circos | ok | 588747 | 6 | 48.0000000 | 1.0000000 | | 2020-04-30 11:21:51.49684 | |
997 | circos | ok | 588747 | 6 | 46.0000000 | 1.0000000 | | 2020-04-30 11:23:52.455536 | |
**WARNING** | |
!> This can be very slow for large databases and there is no tool filtering; every job + dataset table record are scanned. | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH dataset_filesizes AS ( | |
SELECT | |
job_to_input_dataset.job_id, sum(file_size) AS total_filesize, | |
count(file_size) AS num_files FROM dataset | |
LEFT JOIN job_to_input_dataset ON dataset.id = job_to_input_dataset.dataset_id | |
GROUP BY job_to_input_dataset.job_id | |
) | |
SELECT | |
job.id AS job_id, | |
job.tool_id, | |
job.state, | |
dataset_filesizes.total_filesize, | |
dataset_filesizes.num_files, | |
jmn1.metric_value AS runtime_seconds, | |
jmn2.metric_value AS slots, | |
jmn3.metric_value AS memory_bytes, | |
job.create_time AS create_time | |
FROM job | |
LEFT JOIN dataset_filesizes ON job.id = dataset_filesizes.job_id | |
LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'runtime_seconds') jmn1 ON jmn1.job_id = job.id | |
LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'galaxy_slots') jmn2 ON jmn2.job_id = job.id | |
LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'memory.memsw.max_usage_in_bytes') jmn3 ON jmn3.job_id = job.id | |
EOF | |
} | |
query_history-core-hours() { ##? [history-name-ilike]: Produces the median core hour count for histories matching a name filter | |
handle_help "$@" <<-EOF | |
EOF | |
read -r -d '' QUERY <<-EOF | |
WITH | |
toolavg | |
AS ( | |
SELECT | |
tool_id, history_id, round(sum(a.metric_value * b.metric_value / 3600), 2) AS cpu_hours | |
FROM | |
job_metric_numeric AS a, job_metric_numeric AS b, job | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'galaxy_slots' | |
AND history_id in (select id from history where name ilike '%$arg_history_name%') | |
GROUP BY | |
tool_id, history_id | |
), | |
toolmedian | |
AS ( | |
SELECT | |
toolavg.tool_id, percentile_cont(0.5) WITHIN GROUP (ORDER BY cpu_hours) | |
FROM | |
toolavg | |
GROUP BY | |
toolavg.tool_id | |
) | |
SELECT | |
sum(toolmedian.percentile_cont) | |
FROM | |
toolmedian | |
EOF | |
} | |
query_pulsar-gb-transferred() { ##? [--bymonth] [--byrunner] [--human]: Counts up datasets transferred and output file size produced by jobs running on destinations like pulsar_* | |
handle_help "$@" <<-EOF | |
EOF | |
orderby="" | |
declare -a ordering | |
if [[ -n "$arg_bymonth" ]]; then | |
orderby="ORDER BY sent.month ASC" | |
ordering+=("sent.month") | |
fi | |
if [[ -n "$arg_byrunner" ]]; then | |
if [[ ! -n "$arg_bymonth" ]]; then | |
orderby="ORDER BY sent.runner ASC" | |
fi | |
ordering+=("sent.runner") | |
fi | |
if [[ -n "$arg_human" ]]; then | |
pg_size_pretty_a="pg_size_pretty(" | |
pg_size_pretty_b=")" | |
fi | |
groupby="" | |
data_string="${ordering[*]}" | |
csvcols="${data_string//${IFS:0:1}/,}" | |
if (( ${#ordering[@]} > 0 )); then | |
groupby="GROUP BY $csvcols" | |
csvcols="$csvcols," | |
fi | |
read -r -d '' QUERY <<-EOF | |
WITH | |
sent | |
AS ( | |
SELECT | |
job.id AS job, | |
date_trunc('month', job.create_time)::DATE AS month, | |
job.job_runner_name AS runner, | |
ds_in.total_size AS size | |
FROM | |
job | |
LEFT JOIN job_to_input_dataset AS jtid ON job.id = jtid.job_id | |
LEFT JOIN history_dataset_association AS hda_in ON jtid.dataset_id = hda_in.id | |
LEFT JOIN dataset AS ds_in ON hda_in.dataset_id = ds_in.id | |
WHERE | |
job_runner_name LIKE 'pulsar%' | |
ORDER BY | |
job.id DESC | |
), | |
recv | |
AS ( | |
SELECT | |
job.id AS job, | |
date_trunc('month', job.create_time)::DATE AS month, | |
job.job_runner_name AS runner, | |
ds_out.total_size AS size | |
FROM | |
job | |
LEFT JOIN job_to_output_dataset AS jtid ON job.id = jtid.job_id | |
LEFT JOIN history_dataset_association AS hda_out ON jtid.dataset_id = hda_out.id | |
LEFT JOIN dataset AS ds_out ON hda_out.dataset_id = ds_out.id | |
WHERE | |
job_runner_name LIKE 'pulsar%' | |
ORDER BY | |
job.id DESC | |
) | |
SELECT | |
$csvcols ${pg_size_pretty_a}sum(sent.size)${pg_size_pretty_b} AS sent, ${pg_size_pretty_a}sum(recv.size)${pg_size_pretty_b} AS recv, count(sent.size) as job_count | |
FROM | |
sent FULL JOIN recv ON sent.job = recv.job | |
$groupby | |
$orderby | |
EOF | |
} | |
registered_subcommands="$registered_subcommands filter" | |
_filter_short_help="Some text filtering and processing commands" | |
filter_pg2md() { ## : Convert postgres table format outputs to something that can be pasted as markdown | |
handle_help "$@" <<-EOF | |
Imagine doing something like: | |
$ gxadmin query active-users 2018 | gxadmin filter pg2md | |
unique_users | month | |
------------- | -------------------- | |
811 | 2018-12-01 00:00:00 | |
658 | 2018-11-01 00:00:00 | |
583 | 2018-10-01 00:00:00 | |
444 | 2018-09-01 00:00:00 | |
342 | 2018-08-01 00:00:00 | |
379 | 2018-07-01 00:00:00 | |
370 | 2018-06-01 00:00:00 | |
330 | 2018-05-01 00:00:00 | |
274 | 2018-04-01 00:00:00 | |
186 | 2018-03-01 00:00:00 | |
168 | 2018-02-01 00:00:00 | |
122 | 2018-01-01 00:00:00 | |
and it should produce a nicely formatted table | |
EOF | |
cat | sed 's/--+--/- | -/g;s/^\(\s\+\)\([^|]\+\) |/\2 \1|/g' | head -n -2 | |
} | |
filter_identicon(){ ## : Convert an input data stream into an identicon (e.g. with hostname) | |
handle_help "$@" <<-EOF | |
Given an input data stream, digest it, and colour it using the same logic as digest-color | |
$ echo test | ./gxadmin filter identicon | |
██████ | |
██ ██ | |
██ ██ ██ | |
██████ | |
██ ██ ██ | |
(Imagine that it is a nice pink/blue colour scheme) | |
EOF | |
cat | $GXADMIN_PYTHON -c "$identicon_script" | |
} | |
filter_digest-color() { ## : Color an input stream based on the contents (e.g. hostname) | |
handle_help "$@" <<-EOF | |
Colors entire input stream based on digest of entire input's contents. | |
Mostly useful for colouring a hostname or some similar value. | |
$ echo test | ./gxadmin filter digest-color | |
test | |
(Imagine that it is light blue text on a pink background) | |
**NOTE** If the output isn't coloured properly, try: | |
export TERM=screen-256color | |
EOF | |
data="$(cat)" | |
fg_color=$((16#$(echo "$data" | perl -pe "chomp if eof" | sha256sum | cut -c1-2))) | |
bg_color=$((fg_color + 15)) | |
echo "$(tput setaf $fg_color)$(tput setab $bg_color)${data}$(tput sgr0)" | |
} | |
registered_subcommands="$registered_subcommands uwsgi" | |
_uwsgi_short_help="Galaxy Handler/Zergling management (SystemD only)" | |
uwsgi_stats-influx(){ ## <addr>: InfluxDB formatted output for the current stats | |
handle_help "$@" <<-EOF | |
Contact a specific uWSGI stats address (requires uwsgi binary on path) | |
and requests the current stats + formats them for InfluxDB. For some | |
reason it has trouble with localhost vs IP address, so recommend that | |
you use IP. | |
$ gxadmin uwsgi stats-influx 127.0.0.1:9191 | |
uwsgi.locks,addr=127.0.0.1:9191,group=user_0 count=0 | |
uwsgi.locks,addr=127.0.0.1:9191,group=signal count=0 | |
uwsgi.locks,addr=127.0.0.1:9191,group=filemon count=0 | |
uwsgi.locks,addr=127.0.0.1:9191,group=timer count=0 | |
uwsgi.locks,addr=127.0.0.1:9191,group=rbtimer count=0 | |
uwsgi.locks,addr=127.0.0.1:9191,group=cron count=0 | |
uwsgi.locks,addr=127.0.0.1:9191,group=thunder count=2006859 | |
uwsgi.locks,addr=127.0.0.1:9191,group=rpc count=0 | |
uwsgi.locks,addr=127.0.0.1:9191,group=snmp count=0 | |
uwsgi.general,addr=127.0.0.1:9191 listen_queue=0,listen_queue_errors=0,load=0,signal_queue=0 | |
uwsgi.sockets,addr=127.0.0.1:9191,name=127.0.0.1:4001,proto=uwsgi queue=0,max_queue=100,shared=0,can_offload=0 | |
uwsgi.worker,addr=127.0.0.1:9191,id=1 accepting=1,requests=65312,exceptions=526,harakiri_count=26,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=17433008661,respawn_count=27,tx=15850829410,avg_rt=71724 | |
uwsgi.worker,addr=127.0.0.1:9191,id=2 accepting=1,requests=67495,exceptions=472,harakiri_count=51,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=15467746010,respawn_count=52,tx=15830867066,avg_rt=65380 | |
uwsgi.worker,addr=127.0.0.1:9191,id=3 accepting=1,requests=67270,exceptions=520,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14162158015,respawn_count=36,tx=15799661545,avg_rt=73366 | |
uwsgi.worker,addr=127.0.0.1:9191,id=4 accepting=1,requests=66434,exceptions=540,harakiri_count=34,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=15740205807,respawn_count=35,tx=16231969649,avg_rt=75468 | |
uwsgi.worker,addr=127.0.0.1:9191,id=5 accepting=1,requests=67021,exceptions=534,harakiri_count=38,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14573155758,respawn_count=39,tx=16517287963,avg_rt=140855 | |
uwsgi.worker,addr=127.0.0.1:9191,id=6 accepting=1,requests=66810,exceptions=483,harakiri_count=24,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=19107513635,respawn_count=25,tx=15945313469,avg_rt=64032 | |
uwsgi.worker,addr=127.0.0.1:9191,id=7 accepting=1,requests=66544,exceptions=460,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14240478391,respawn_count=36,tx=15499531841,avg_rt=114981 | |
uwsgi.worker,addr=127.0.0.1:9191,id=8 accepting=1,requests=67577,exceptions=517,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14767971195,respawn_count=36,tx=15780639229,avg_rt=201275 | |
For multiple zerglings you can run this for each and just 2>/dev/null | |
PATH=/opt/galaxy/venv/bin:/sbin:/bin:/usr/sbin:/usr/bin gxadmin uwsgi stats-influx 127.0.0.1:9190 2>/dev/null | |
PATH=/opt/galaxy/venv/bin:/sbin:/bin:/usr/sbin:/usr/bin gxadmin uwsgi stats-influx 127.0.0.1:9191 2>/dev/null | |
exit 0 | |
And it will fetch only data for responding uwsgis. | |
EOF | |
address="$1"; shift | |
# fetch data | |
uwsgi=$(which uwsgi) | |
data="$($uwsgi --connect-and-read "$address" 2>&1)" | |
echo "$data" | \ | |
jq -r '.locks[] | to_entries[] | "uwsgi.locks,addr='"$address"',group=\(.key) count=\(.value)"' | \ | |
sed 's/group=user 0/group=user_0/g' | |
echo "$data" | \ | |
jq -r '. | "uwsgi.general,addr='"$address"' listen_queue=\(.listen_queue),listen_queue_errors=\(.listen_queue_errors),load=\(.load),signal_queue=\(.signal_queue)"' | |
echo "$data" | \ | |
jq -r '.sockets[] | "uwsgi.sockets,addr='"$address"',name=\(.name),proto=\(.proto) queue=\(.queue),max_queue=\(.max_queue),shared=\(.shared),can_offload=\(.can_offload)"' | |
echo "$data" | \ | |
jq -r '.workers[] | "uwsgi.worker,addr='"$address"',id=\(.id) accepting=\(.accepting),requests=\(.requests),exceptions=\(.exceptions),harakiri_count=\(.harakiri_count),signals=\(.signals),signal_queue=\(.signal_queue),status=\"\(.status)\",rss=\(.rss),vsz=\(.vsz),running_time=\(.running_time),respawn_count=\(.respawn_count),tx=\(.tx),avg_rt=\(.avg_rt)"' | \ | |
sed 's/"busy"/1/g;s/"idle"/0/g;' | |
} | |
uwsgi_status() { ## : Current system status | |
handle_help "$@" <<-EOF | |
Current status of all uwsgi processes | |
EOF | |
echo "galaxy-zergpool: $(systemctl status galaxy-zergpool | grep Active:)" | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do | |
service=$(basename "$folder") | |
echo "$service $(systemctl status $service | grep Active:)" | |
done | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do | |
service=$(basename "$folder") | |
echo "$service $(systemctl status $service | grep Active:)" | |
done | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dworkflow\\x2dscheduler.slice/ -mindepth 1 -type d -name 'galaxy-workflow*service'); do | |
service=$(basename "$folder") | |
echo "$service $(systemctl status $service | grep Active:)" | |
done | |
} | |
uwsgi_memory() { ## : Current system memory usage | |
handle_help "$@" <<-EOF | |
Obtain memory usage of the various Galaxy processes | |
Also consider using systemd-cgtop | |
EOF | |
echo "galaxy-zergpool.service $(cat /sys/fs/cgroup/memory/system.slice/galaxy-zergpool.service/memory.memsw.usage_in_bytes)" | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do | |
service=$(basename "$folder") | |
echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")" | |
done | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do | |
service=$(basename "$folder") | |
echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")" | |
done | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dworkflow\\x2dscheduler.slice/ -mindepth 1 -type d -name 'galaxy-workflow*service'); do | |
service=$(basename "$folder") | |
echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")" | |
done | |
} | |
uwsgi_pids() { ## : Galaxy process PIDs | |
handle_help "$@" <<-EOF | |
Obtain memory usage of the various Galaxy processes | |
EOF | |
echo "galaxy-zergpool: $(systemctl status galaxy-zergpool | grep PID: | cut -d' ' -f 4)" | |
for i in {0..3}; do | |
echo "galaxy-zergling@$i: $(systemctl status galaxy-zergling@$i | grep PID: | cut -d' ' -f 4)" | |
done | |
for i in {0..11}; do | |
echo "galaxy-handler@$i: $(systemctl status galaxy-handler@$i | grep PID: | cut -d' ' -f 4)" | |
done | |
} | |
uwsgi_stats() { ## : uwsgi stats | |
handle_help "$@" <<-EOF | |
EOF | |
uwsgi_stats-influx 127.0.0.1:4010 2>/dev/null || true | |
uwsgi_stats-influx 127.0.0.1:4011 2>/dev/null || true | |
uwsgi_stats-influx 127.0.0.1:4012 2>/dev/null || true | |
uwsgi_stats-influx 127.0.0.1:4013 2>/dev/null || true | |
echo "systemd service=galaxy-zergpool memory=$(cat /sys/fs/cgroup/memory/system.slice/galaxy-zergpool.service/memory.memsw.usage_in_bytes)" | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do | |
service=$(basename "$folder") | |
echo "systemd service=$service memory=$(cat "$folder/memory.memsw.usage_in_bytes")" | |
done | |
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do | |
service=$(basename "$folder") | |
echo "systemd service=$service memory=$(cat "$folder/memory.memsw.usage_in_bytes")" | |
done | |
} | |
uwsgi_zerg-swap() { ## : Swap zerglings in order (unintelligent version) | |
handle_help "$@" <<-EOF | |
This is the "dumb" version which loops across the zerglings and restarts them in series | |
EOF | |
active="$(uwsgi_status | grep -v inactive | grep zergling | cut -d: -f1 | wc -l)" | |
if (( active == 1 )); then | |
error "Won't restart when only a single zergling is active as this will trigger downtime" | |
exit 1 | |
fi | |
# Obtain current running ones | |
for zergling in $(uwsgi_status | grep -v inactive | grep zergling | cut -d: -f1); do | |
echo "Restarting $zergling" | |
systemctl restart "$zergling" | |
number=$(echo "$zergling" | sed 's/.*@//g') | |
stats="127.0.0.1:401$number" | |
wait_for_url "$stats" | |
success "$zergling is alive" | |
done | |
} | |
uwsgi_zerg-scale-up() { ## : Add another zergling to deal with high load | |
handle_help "$@" <<-EOF | |
EOF | |
# Obtain current running ones | |
an_inactive=$(status | grep inactive | grep zergling | head -n1 | cut -d: -f1) | |
echo "$an_inactive is inactive, starting" | |
systemctl start "$an_inactive" | |
} | |
#uwsgi_zerg-scale-down() { ## : Remove an extraneous zergling | |
#handle_help "$@" <<-EOF | |
#EOF | |
## Obtain current running ones | |
#active="$(status | grep -v inactive | grep zergling | cut -d: -f1)" | |
#num_active=$(echo "$active" | wc -l) | |
#if (( num_active <= 2 )); then | |
#error "We do not permit scaling below 2 zerglings" | |
#exit 1 | |
#fi | |
## Kill highest numbered one | |
#warning "$to_kill will be stopped" | |
#systemctl stop "$to_kill" | |
#} | |
uwsgi_zerg-strace() { ## [number]: Strace a zergling | |
handle_help "$@" <<-EOF | |
EOF | |
if (( $# > 0 )); then | |
number=$1; | |
fi | |
procs=$(uwsgi_pids | grep "zergling@${number}" | cut -d':' -f2 | sed 's/\s*//g;' | tr '\n' ' ' | sed 's/^\s*//;s/\s*$//g;s/ / -p /g') | |
strace -e open,openat -p "$procs" | |
} | |
uwsgi_handler-strace() { ## [number]: Strace a handler | |
handle_help "$@" <<-EOF | |
EOF | |
if (( $# > 0 )); then | |
number=$1; | |
fi | |
procs=$(uwsgi_pids | grep "handler@${number}" | cut -d':' -f2 | sed 's/\s*//g;' | tr '\n' ' ' | sed 's/^\s*//;s/\s*$//g;s/ / -p /g') | |
strace -e open,openat -p "$procs" | |
} | |
uwsgi_handler-restart() { ## : Restart all handlers | |
handle_help "$@" <<-EOF | |
EOF | |
for i in {0..11}; do | |
systemctl restart galaxy-handler@$i & | |
done | |
} | |
uwsgi_lastlog(){ ## : Fetch the number of seconds since the last log message was written | |
handle_help "$@" <<-EOF | |
Lets you know if any of your workers or handlers have maybe stopped processing jobs. | |
$ gxadmin uwsgi lastlog | |
journalctl.lastlog,service=galaxy-handler@0 seconds=8 | |
journalctl.lastlog,service=galaxy-handler@1 seconds=2 | |
journalctl.lastlog,service=galaxy-handler@2 seconds=186 | |
journalctl.lastlog,service=galaxy-handler@3 seconds=19 | |
journalctl.lastlog,service=galaxy-handler@4 seconds=6 | |
journalctl.lastlog,service=galaxy-handler@5 seconds=80 | |
journalctl.lastlog,service=galaxy-handler@6 seconds=52 | |
journalctl.lastlog,service=galaxy-handler@7 seconds=1 | |
journalctl.lastlog,service=galaxy-handler@8 seconds=79 | |
journalctl.lastlog,service=galaxy-handler@9 seconds=40 | |
journalctl.lastlog,service=galaxy-handler@10 seconds=123 | |
journalctl.lastlog,service=galaxy-handler@11 seconds=13 | |
journalctl.lastlog,service=galaxy-zergling@0 seconds=0 | |
journalctl.lastlog,service=galaxy-zergling@1 seconds=0 | |
journalctl.lastlog,service=galaxy-zergling@2 seconds=2866 | |
EOF | |
NOW=$(date +%s) | |
for i in {0..11}; do | |
lines=$(journalctl -u galaxy-handler@$i -n 1 --no-pager) | |
if [[ ! $lines == *"No entries"* ]]; then | |
timestamp=$(journalctl -u galaxy-handler@$i -n 1 --no-pager | grep -v 'Logs begin' | awk '{print $1" "$2" "$3}'); | |
unix=$(date -d "$timestamp" +%s) | |
date_diff=$((NOW - unix)); | |
echo "journalctl.lastlog,service=galaxy-handler@$i seconds=$date_diff"; | |
fi | |
done | |
for i in {0..4}; do | |
lines=$(journalctl -u galaxy-zergling@$i -n 1 --no-pager) | |
if [[ ! $lines == *"No entries"* ]]; then | |
timestamp=$(journalctl -u galaxy-zergling@$i -n 1 --no-pager | grep -v 'Logs begin' | awk '{print $1" "$2" "$3}'); | |
unix=$(date -d "$timestamp" +%s) | |
date_diff=$((NOW - unix)); | |
echo "journalctl.lastlog,service=galaxy-zergling@$i seconds=$date_diff"; | |
fi | |
done | |
} | |
uwsgi_active-users() { ## : Count active users | |
handle_help "$@" <<-EOF | |
Count active users and return an influx compatible measurement | |
EOF | |
users=$(journalctl -u galaxy-zerg* --since '10 minutes ago' | grep pid: | awk '{print $10}' | sort -u | wc -l) | |
echo "journalctl.activeusers,service=galaxy count=${users}" | |
} | |
registered_subcommands="$registered_subcommands galaxy" | |
_galaxy_short_help="Galaxy Administration" | |
galaxy_cleanup() { ## [days]: Cleanup histories/hdas/etc for past N days (default=30) | |
handle_help "$@" <<-EOF | |
Cleanup histories/hdas/etc for past N days using the python objects-based method | |
EOF | |
days=30 | |
if (( $# > 0 )); then | |
days=$1 | |
fi | |
assert_set_env GALAXY_ROOT | |
assert_set_env GALAXY_LOG_DIR | |
mkdir -p $GALAXY_LOG_DIR | |
run_date=$(date --rfc-3339=seconds) | |
for action in {delete_userless_histories,delete_exported_histories,purge_deleted_users,purge_deleted_histories,purge_deleted_hdas,purge_historyless_hdas,purge_hdas_of_purged_histories,delete_datasets,purge_datasets}; do | |
start_time=$(date +%s) | |
cd "$GALAXY_ROOT"; | |
$GXADMIN_PYTHON \ | |
"$GALAXY_ROOT/scripts/cleanup_datasets/pgcleanup.py" \ | |
-U \ | |
-o "$days" \ | |
-l "$GALAXY_LOG_DIR" \ | |
-s $action \ | |
-w 128MB \ | |
>> "$GALAXY_LOG_DIR/cleanup-${run_date}-${action}.log" \ | |
2>> "$GALAXY_LOG_DIR/cleanup-${run_date}-${action}.err"; | |
finish_time=$(date +%s) | |
runtime=$(( finish_time - start_time )) | |
# Something that telegraf can consume | |
ec=$? | |
if (( ec == 0 )); then | |
echo "cleanup_datasets,group=$action success=1,runtime=$runtime" | |
else | |
echo "cleanup_datasets,group=$action success=0,runtime=$runtime" | |
fi | |
done | |
} | |
galaxy_encode() { ## <encoded-id>: Encode an ID | |
handle_help "$@" <<-EOF | |
Encode an unecoded ID | |
$ GALAXY_CONFIG_FILE=/srv/galaxy/config/galaxy.yml GALAXY_ROOT=/srv/galaxy/server gxadmin galaxy encode 123 | |
6fe4eea8c591a9c4 | |
EOF | |
assert_set_env VIRTUAL_ENV | |
assert_set_env GALAXY_ROOT | |
assert_set_env GALAXY_CONFIG_FILE | |
assert_count $# 1 "Missing ID" | |
python "$GALAXY_ROOT/scripts/secret_decoder_ring.py" -c "$GALAXY_CONFIG_FILE" encode "$1" | |
} | |
galaxy_decode() { ## <encoded-id>: Decode an encoded ID | |
handle_help "$@" <<-EOF | |
Encode an unecoded ID | |
$ GALAXY_CONFIG_FILE=/srv/galaxy/config/galaxy.yml GALAXY_ROOT=/srv/galaxy/server gxadmin galaxy decode 6fe4eea8c591a9c4 | |
123 | |
EOF | |
assert_set_env VIRTUAL_ENV | |
assert_set_env GALAXY_ROOT | |
assert_set_env GALAXY_CONFIG_FILE | |
assert_count $# 1 "Missing ID" | |
python "$GALAXY_ROOT/scripts/secret_decoder_ring.py" -c "$GALAXY_CONFIG_FILE" decode "$1" | |
} | |
galaxy_migrate-tool-install-to-sqlite() { ## : Converts normal potsgres toolshed repository tables into the SQLite version | |
handle_help "$@" <<-EOF | |
$ gxadmin migrate-tool-install-to-sqlite | |
Creating new sqlite database: galaxy_install.sqlite | |
Migrating tables | |
export: tool_shed_repository | |
import: tool_shed_repository | |
... | |
export: repository_repository_dependency_association | |
import: repository_repository_dependency_association | |
Complete | |
EOF | |
# Export tables | |
if [[ -f galaxy_install.sqlite ]]; then | |
error "galaxy_install.sqlite exists, not overwriting" | |
exit 1 | |
fi | |
success "Creating new sqlite database: galaxy_install.sqlite" | |
empty_schema=$(mktemp) | |
echo " | |
PRAGMA foreign_keys=OFF; | |
BEGIN TRANSACTION; | |
CREATE TABLE migrate_version ( | |
repository_id VARCHAR(250) NOT NULL, | |
repository_path TEXT, | |
version INTEGER, | |
PRIMARY KEY (repository_id) | |
); | |
CREATE TABLE tool_shed_repository ( | |
id INTEGER NOT NULL, | |
create_time DATETIME, | |
update_time DATETIME, | |
tool_shed VARCHAR(255), | |
name VARCHAR(255), | |
description TEXT, | |
owner VARCHAR(255), | |
changeset_revision VARCHAR(255), | |
deleted BOOLEAN, | |
metadata BLOB, | |
includes_datatypes BOOLEAN, | |
installed_changeset_revision VARCHAR(255), | |
uninstalled BOOLEAN, | |
dist_to_shed BOOLEAN, | |
ctx_rev VARCHAR(10), | |
status VARCHAR(255), | |
error_message TEXT, | |
tool_shed_status BLOB, | |
PRIMARY KEY (id), | |
CHECK (deleted IN (0, 1)) | |
); | |
CREATE TABLE tool_version ( | |
id INTEGER NOT NULL, | |
create_time DATETIME, | |
update_time DATETIME, | |
tool_id VARCHAR(255), | |
tool_shed_repository_id INTEGER, | |
PRIMARY KEY (id), | |
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id) | |
); | |
CREATE TABLE tool_version_association ( | |
id INTEGER NOT NULL, | |
tool_id INTEGER NOT NULL, | |
parent_id INTEGER NOT NULL, | |
PRIMARY KEY (id), | |
FOREIGN KEY(tool_id) REFERENCES tool_version (id), | |
FOREIGN KEY(parent_id) REFERENCES tool_version (id) | |
); | |
CREATE TABLE migrate_tools ( | |
repository_id VARCHAR(255), | |
repository_path TEXT, | |
version INTEGER | |
); | |
CREATE TABLE tool_dependency ( | |
id INTEGER NOT NULL, | |
create_time DATETIME, | |
update_time DATETIME, | |
tool_shed_repository_id INTEGER NOT NULL, | |
name VARCHAR(255), | |
version VARCHAR(40), | |
type VARCHAR(40), | |
status VARCHAR(255), | |
error_message TEXT, | |
PRIMARY KEY (id), | |
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id) | |
); | |
CREATE TABLE repository_dependency ( | |
id INTEGER NOT NULL, | |
create_time DATETIME, | |
update_time DATETIME, | |
tool_shed_repository_id INTEGER NOT NULL, | |
PRIMARY KEY (id), | |
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id) | |
); | |
CREATE TABLE repository_repository_dependency_association ( | |
id INTEGER NOT NULL, | |
create_time DATETIME, | |
update_time DATETIME, | |
tool_shed_repository_id INTEGER, | |
repository_dependency_id INTEGER, | |
PRIMARY KEY (id), | |
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id), | |
FOREIGN KEY(repository_dependency_id) REFERENCES repository_dependency (id) | |
); | |
CREATE INDEX ix_tool_shed_repository_name ON tool_shed_repository (name); | |
CREATE INDEX ix_tool_shed_repository_deleted ON tool_shed_repository (deleted); | |
CREATE INDEX ix_tool_shed_repository_tool_shed ON tool_shed_repository (tool_shed); | |
CREATE INDEX ix_tool_shed_repository_changeset_revision ON tool_shed_repository (changeset_revision); | |
CREATE INDEX ix_tool_shed_repository_owner ON tool_shed_repository (owner); | |
CREATE INDEX ix_tool_shed_repository_includes_datatypes ON tool_shed_repository (includes_datatypes); | |
CREATE INDEX ix_tool_version_tool_shed_repository_id ON tool_version (tool_shed_repository_id); | |
CREATE INDEX ix_tool_version_association_tool_id ON tool_version_association (tool_id); | |
CREATE INDEX ix_tool_version_association_parent_id ON tool_version_association (parent_id); | |
CREATE INDEX ix_tool_dependency_tool_shed_repository_id ON tool_dependency (tool_shed_repository_id); | |
CREATE INDEX ix_repository_dependency_tool_shed_repository_id ON repository_dependency (tool_shed_repository_id); | |
CREATE INDEX ix_repository_repository_dependency_association_tool_shed_repository_id ON repository_repository_dependency_association (tool_shed_repository_id); | |
CREATE INDEX ix_repository_repository_dependency_association_repository_dependency_id ON repository_repository_dependency_association (repository_dependency_id); | |
COMMIT; | |
" > "${empty_schema}" | |
sqlite3 galaxy_install.sqlite < "${empty_schema}" | |
rm "${empty_schema}" | |
success "Migrating tables" | |
# tool_shed_repository is special :( | |
table=tool_shed_repository | |
success " export: ${table}" | |
export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX) | |
psql -c "COPY (select | |
id, create_time, update_time, tool_shed, name, description, owner, changeset_revision, case when deleted then 1 else 0 end, metadata, includes_datatypes, installed_changeset_revision, uninstalled, dist_to_shed, ctx_rev, status, error_message, tool_shed_status from $table) to STDOUT with CSV" > "$export_csv"; | |
success " import: ${table}" | |
echo ".mode csv | |
.import ${export_csv} ${table}" | sqlite3 galaxy_install.sqlite | |
ec=$? | |
if (( ec == 0 )); then | |
rm "${export_csv}"; | |
else | |
error " sql: ${export_csv}" | |
fi | |
sqlite3 galaxy_install.sqlite "insert into migrate_version values ('ToolShedInstall', 'lib/galaxy/model/tool_shed_install/migrate', 17)" | |
# the rest are sane! | |
for table in {tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do | |
success " export: ${table}" | |
export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX) | |
psql -c "COPY (select * from $table) to STDOUT with CSV" > "$export_csv"; | |
success " import: ${table}" | |
echo ".mode csv | |
.import ${export_csv} ${table}" | sqlite3 galaxy_install.sqlite | |
ec=$? | |
if (( ec == 0 )); then | |
rm "${export_csv}" | |
else | |
error " sql: ${export_csv}" | |
fi | |
done | |
success "Complete" | |
} | |
galaxy_migrate-tool-install-from-sqlite() { ## [sqlite-db]: Converts SQLite version into normal potsgres toolshed repository tables | |
handle_help "$@" <<-EOF | |
$ gxadmin migrate-tool-install-from-sqlite db.sqlite | |
Migrating tables | |
export: tool_shed_repository | |
import: tool_shed_repository | |
... | |
export: repository_repository_dependency_association | |
import: repository_repository_dependency_association | |
Complete | |
EOF | |
assert_count $# 1 "Must provide database" | |
success "Migrating tables" | |
# Truncate first, since need to be removed in a specific ordering (probably | |
# cascade would work but cascade is SCARY) | |
psql -c "TRUNCATE TABLE repository_repository_dependency_association, repository_dependency, tool_dependency, migrate_tools, tool_version_association, tool_version, tool_shed_repository" | |
ec1=$? | |
# If you truncate this one, then it also removes galaxy codebase version, | |
# breaking everything. | |
psql -c "delete from migrate_version where repository_id = 'ToolShedInstall'" | |
ec2=$? | |
if (( ec1 == 0 && ec2 == 0 )); then | |
success " Cleaned" | |
else | |
error " Failed to clean" | |
fi | |
# Then load data in same 'safe' order as in sqlite version | |
for table in {migrate_version,tool_shed_repository,tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do | |
success " export: ${table}" | |
export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX) | |
if [[ "$table" == "tool_shed_repository" ]]; then | |
# Might have json instead of hexencoded json | |
sqlite3 -csv "$1" "select * from $table" | python -c "$hexencodefield9" > "$export_csv"; | |
elif [[ "$table" == "tool_version" ]]; then | |
# Might have null values quoted as empty string | |
sqlite3 -csv "$1" "select * from $table" | sed 's/""$//' > "$export_csv"; | |
else | |
sqlite3 -csv "$1" "select * from $table" > "$export_csv"; | |
fi | |
psql -c "COPY $table FROM STDIN with CSV" < "$export_csv"; | |
ec=$? | |
if (( ec == 0 )); then | |
rm "${export_csv}" | |
success " import: ${table}" | |
else | |
error " csv: ${export_csv}" | |
break | |
fi | |
done | |
# Update sequences | |
success "Updating sequences" | |
for table in {tool_shed_repository,tool_version,tool_version_association,tool_dependency,repository_dependency,repository_repository_dependency_association}; do | |
psql -c "SELECT setval('${table}_id_seq', (SELECT MAX(id) FROM ${table}));" | |
done | |
success "Comparing table counts" | |
for table in {migrate_version,tool_shed_repository,tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do | |
postgres=$(psql -c "COPY (select count(*) from $table) to STDOUT with CSV") | |
sqlite=$(sqlite3 -csv "$1" "select count(*) from $table") | |
if (( postgres == sqlite )); then | |
success " $table: $postgres == $sqlite" | |
else | |
error " $table: $postgres != $sqlite" | |
fi | |
done | |
success "Complete" | |
} | |
galaxy_amqp-test() { ## <amqp_url>: Test a given AMQP URL for connectivity | |
handle_help "$@" <<-EOF | |
**Note**: must be run in Galaxy Virtualenv | |
Simple script to test an AMQP URL. If connection works, it will | |
immediately exit with a python object: | |
$ gxadmin galaxy amqp-test pyamqp://user:pass@host:port/vhost | |
<kombu.transport.pyamqp.Channel object at 0x7fe56a836290> | |
$ gxadmin galaxy amqp-test pyamqp://user:pass@host:port/vhost?ssl=1 | |
<kombu.transport.pyamqp.Channel object at 0x7fe56a836290> | |
Some errors look weird: | |
*wrong password*: | |
$ gxadmin galaxy amqp-test ... | |
Traceback | |
... | |
amqp.exceptions.AccessRefused: (0, 0): (403) ACCESS_REFUSED - Login was refused using authentication mechanism AMQPLAIN. For details see the broker logfile. | |
*wrong host*, *inaccessible host*, basically any other problem: | |
$ gxadmin galaxy amqp-test ... | |
[hangs forever] | |
Basically any error results in a hang forever. It is recommended you run it with a timeout: | |
$ timeout 1 gxadmin galaxy amqp-test | |
$ | |
EOF | |
assert_count $# 1 "Must provide URL" | |
URL="$1" | |
script=$(cat <<EOF | |
from kombu import Connection | |
from kombu import Exchange | |
with Connection('$URL') as conn: | |
print(conn.default_channel) | |
EOF | |
) | |
python -c "$script" | |
} | |
galaxy_cleanup-jwd() { ## <working_dir> [1|months ago]: (NEW) Cleanup job working directories | |
handle_help "$@" <<-EOF | |
Scans through a provided job working directory subfolder, e.g. | |
job_working_directory/ without the 005 subdir to find all folders which | |
were changed less recently than N months. | |
Then it takes the first 1000 entries and cleans them up. This was more | |
of a hack to handle the fact that the list produced by find is really | |
long, and the for loop hangs until it's done generating the list. | |
EOF | |
assert_count_ge $# 1 "Must supply at least working dir" | |
jwd=$1 | |
months=${2:-1} | |
# scan a given directory for jwds. | |
for possible_dir in $(find "$jwd" -maxdepth 3 -mindepth 3 -not -newermt "$months month ago" | grep -v _cleared_contents | head -n 1000); do | |
job_id=$(basename "$possible_dir") | |
if [[ "$job_id" =~ ^[0-9]{3,}$ ]]; then | |
state=$(psql -c "COPY (select state from job where id = $job_id) to STDOUT with CSV") | |
if [[ "$state" == "error" ]] || [[ "$state" == "ok" ]] || [[ "$state" == "deleted" ]] || [[ "$state" == "paused" ]] || [[ "$state" == "new_manually_dropped" ]]; then | |
echo "- $possible_dir $job_id $state" | |
rm -rf "$possible_dir" | |
else | |
echo "? $possible_dir $job_id $state" | |
fi | |
fi | |
done | |
} | |
galaxy_fix-conda-env() { ## <conda_dir/envs/>: Fix broken conda environments | |
handle_help "$@" <<-EOF | |
Fixes any broken conda environments which are missing the activate scripts in their correct locations. | |
MUST end in /envs/ | |
EOF | |
conda_dir=$(dirname $1) | |
for i in $(find ${conda_dir}/envs/ -mindepth 1 -maxdepth 1 -type d); | |
do | |
if [ ! -f $i/bin/conda ]; then | |
${conda_dir}/bin/conda ..checkenv bash $i | |
fi | |
done | |
} | |
galaxy_fav_tools() { ## : Favourite tools in Galaxy DB | |
handle_help "$@" <<-EOF | |
What are people's fav tools | |
EOF | |
psql -qAt -c "select value::json->'tools' from user_preference where name = 'favorites'" | jq -s add | jq '. | @tsv' -r | sed 's/\t/\n/g' | sort | uniq -c | sort -n | |
} | |
galaxy_ie-list() { ## : List GIEs | |
handle_help "$@" <<-EOF | |
List running IEs (based on output of queue-detail) | |
$ gxadmin local ie-list | |
running 6134209 jupyter_notebook helena-rasche | |
EOF | |
gxadmin query queue-detail | grep interactive | awk -F'|' '{print $1" "$2" "$4" "$5}' | sed 's/interactive_tool_//g' | column -t | |
} | |
galaxy_ie-show() { ## [gie-galaxy-job-id]: Report on a GIE [HTCondor Only!] | |
handle_help "$@" <<-EOF | |
The new versions of IEs are IMPOSSIBLE to track down, so here's a handy | |
function for you to make you hate life a lil bit less. | |
$ gxadmin local ie-show 6134209 | |
Galaxy ID: 6134209 | |
Condor ID: 1489026 | |
Container: jupyter_notebook | |
Running on: cn032.bi.uni-freiburg.de | |
Job script: /data/dnb01/galaxy_db/pbs/galaxy_6134209.sh | |
Working dir: /data/dnb02/galaxy_db/job_working_directory/006/134/6134209 | |
Runtime: | |
{ | |
8888: { | |
host: 132.230.68.65, | |
port: 1035, | |
protocol: tcp, | |
tool_port: 8888 | |
} | |
} | |
Mapping | |
Key: aaaabbbbccccdddd | |
KeyType: interactivetoolentrypoint | |
Token: 00001111222233334444555566667777 | |
URL: https://aaaabbbbccccdddd-00001111222233334444555566667777.interactivetoolentrypoint.interactivetool.usegalaxy.eu | |
It's probably only going to work for us? Sorry :) | |
EOF | |
assert_set_env GALAXY_CONFIG_FILE | |
id=$1 | |
cluster_id=$(gxadmin jsonquery queue-detail | jq ".[] | select(.id == $1) | .extid" -r) | |
cluster_id_c=$(echo -n "$cluster_id" | wc -c) | |
echo "Galaxy ID: $id" | |
echo "Condor ID: $cluster_id" | |
if (( cluster_id_c == 0 )); then | |
exit 1 | |
fi | |
container=$(condor_q $cluster_id -autoformat JobDescription | sed 's/interactive_tool_//g') | |
echo "Container: $container" | |
running_on=$(condor_q $cluster_id -autoformat RemoteHost | sed 's/.*@//g') | |
echo "Running on: $running_on" | |
job_script=$(condor_q $cluster_id -autoformat Cmd) | |
echo "Job script: $job_script" | |
working_dir=$(grep ^cd $job_script | grep working_directory | sed 's/^cd //g') | |
echo "Working dir: $working_dir" | |
echo "Runtime:" | |
cat $working_dir/container_runtime.json | jq -S | |
port=$(cat $working_dir/container_runtime.json | jq .[].port -r) | |
host=$(cat $working_dir/container_runtime.json | jq .[].host -r) | |
echo | |
echo "Mapping" | |
token=$(gxadmin tsvquery q "select token from interactivetool_entry_point where job_id = $id") | |
data_dir=$(cat "$GALAXY_CONFIG_FILE" | grep interactivetools_map |cut -f2 -d'='| tr -d " ") | |
key=$(sqlite3 $data_dir 'select key from gxitproxy where token="'$token'"') | |
key_type=$(sqlite3 $data_dir 'select key_type from gxitproxy where token="'$token'"') | |
token=$(sqlite3 $data_dir 'select token from gxitproxy where token="'$token'"') | |
echo "Key: $key" | |
echo "KeyType: $key_type" | |
echo "Token: $token" | |
echo "URL: https://${key}-${token}.${key_type}.interactivetool.usegalaxy.eu" | |
} | |
registered_subcommands="$registered_subcommands report" | |
_report_short_help="Various (rich) reports. Consider https://github.com/ttscoff/mdless for easier reading in the terminal! Or | pandoc | lynx -stdin" | |
align_cols() { | |
#cat | sed 's/\t/\t | /g' | column -t -s' ' | |
cat | sed 's/\t/ | /g' | |
} | |
report_user-info(){ ## <user_id|username|email>: Quick overview of a Galaxy user in your system | |
handle_help "$@" <<-EOF | |
This command lets you quickly find out information about a user. The output is formatted as markdown by default. | |
Consider [mdless](https://github.com/ttscoff/mdless) for easier reading in the terminal! | |
$ gxadmin report user-info helena-rasche | |
# Galaxy User 580 | |
Property | Value | |
---------- | ----- | |
ID | helena-rasche (id=580) [email protected] | |
Created | 2017-07-26 14:47:37.575484 | |
Properties | ext=f deleted=f purged=f active=t | |
Disk Usage | 137 GB | |
## Groups/Roles | |
Groups: training-freiburg-rnaseq-2018, training-emc2018 | |
Roles: admin, Backofen | |
## Recent Jobs | |
Tool ID | Status | Created | Exit Code | Runtime | |
---- | ---- | ---- | --- | ---- | |
Grep1 | ok | 2019-01-21 07:27:24.472706 | 0 | 00:01:19 | |
CONVERTER_fasta_to_tabular | ok | 2019-01-21 07:27:24.339862 | 0 | 00:03:34 | |
secure_hash_message_digest | ok | 2019-01-18 16:43:44.262265 | 0 | 00:00:08 | |
CONVERTER_gz_to_uncompressed | ok | 2019-01-18 10:18:23.99171 | 0 | 00:10:02 | |
upload1 | ok | 2019-01-18 08:44:24.955622 | 0 | 01:11:07 | |
echo_main_env | ok | 2019-01-17 16:45:04.019233 | 0 | 00:00:29 | |
secure_hash_message_digest | ok | 2019-01-17 16:03:21.33665 | 0 | 00:00:07 | |
secure_hash_message_digest | ok | 2019-01-17 16:03:20.937433 | 0 | 00:00:09 | |
## Largest Histories | |
History ID | Name | Size | |
---- | ---- | ---- | |
20467 | imported: RNASEQ | 52 GB | |
94748 | imported: ChIP-seq | 49 GB | |
94761 | reduced history-export problem | 49 GB | |
102448 | Wheat Genome | 42 GB | |
38923 | imported: Zooplankton | 29 GB | |
64974 | imported: 65991-A | 17 GB | |
20488 | Unnamed history | 15 GB | |
19414 | Unnamed history | 12 GB | |
92407 | Testing | 11 GB | |
60522 | example1/wf3-shed-tools.ga | 5923 MB | |
EOF | |
# Metada | |
read -r -d '' qstr <<-EOF | |
SELECT | |
username, id, email, create_time AT TIME ZONE 'UTC' as create_time, external, deleted, purged, active, pg_size_pretty(disk_usage) | |
FROM | |
galaxy_user | |
WHERE | |
(galaxy_user.email = '$1' or galaxy_user.username = '$1' or galaxy_user.id = CAST(REGEXP_REPLACE(COALESCE('$1','0'), '[^0-9]+', '0', 'g') AS INTEGER)) | |
EOF | |
results=$(query_tsv "$qstr") | |
user_id=$(echo "$results" | awk '{print $2}') | |
if [[ -z "$user_id" ]]; then | |
error "Unknown user" | |
exit 1 | |
fi | |
# Groups | |
read -r -d '' qstr <<-EOF | |
select string_agg(galaxy_group.name, ', ') from user_group_association, galaxy_group where user_id = $user_id and user_group_association.group_id = galaxy_group.id | |
EOF | |
group_membership=$(query_tsv "$qstr") | |
read -r -d '' qstr <<-EOF | |
select string_agg(role.name, ', ') from user_role_association, role where user_id = $user_id and user_role_association.role_id = role.id and role.type not in ('private', 'sharing') | |
EOF | |
role_membership=$(query_tsv "$qstr") | |
# Recent jobs | |
read -r -d '' qstr <<-EOF | |
SELECT | |
job.id, tool_id, state, create_time AT TIME ZONE 'UTC' as create_time, exit_code, metric_value::text::interval | |
FROM | |
job, job_metric_numeric | |
WHERE | |
job.user_id = $user_id and job.id = job_metric_numeric.job_id and metric_name = 'runtime_seconds' order by job.id desc limit 10 | |
EOF | |
recent_jobs=$(query_tsv "$qstr") | |
recent_jobs2=$(printf "ID\tTool ID\tStatus\tCreated\tExit Code\tRuntime\n----\t----\t----\t----\t---\t----\n%s" "$recent_jobs" | align_cols) | |
# running jobs | |
read -r -d '' qstr <<-EOF | |
SELECT | |
id, tool_id, tool_version, handler, destination_id, state, create_time AT TIME ZONE 'UTC' as create_time, now() AT TIME ZONE 'UTC' - create_time AT TIME ZONE 'UTC' as runtime | |
FROM | |
job | |
WHERE | |
job.user_id = $user_id and job.state in ('running', 'queued', 'new') order by job.id desc | |
EOF | |
running_jobs=$(query_tsv "$qstr") | |
running_jobs2=$(printf "Tool ID\tTool Version\tHandler\tDestination\tState\tCreated\tRuntime\n----\t----\t----\t----\t----\t---\t----\n%s" "$running_jobs" | align_cols) | |
# Recent workflows | |
read -r -d '' qstr <<-EOF | |
SELECT | |
workflow_invocation.id, | |
workflow_invocation.create_time, | |
state, | |
scheduler, | |
handler, | |
substring(workflow.name, 0, 20) AS workflow, | |
substring(history.name, 0, 20) AS history | |
FROM | |
workflow_invocation | |
JOIN workflow ON workflow_invocation.workflow_id = workflow.id | |
JOIN history ON workflow_invocation.history_id = history.id | |
WHERE | |
history.user_id = $user_id | |
ORDER BY | |
create_time DESC | |
LIMIT | |
25 | |
EOF | |
recent_wf=$(query_tsv "$qstr") | |
recent_wf=$(printf "ID\tCreated\tState\tScheduler\tHandler\tWorkflow\tHistory\n----\t----\t----\t----\t----\t----\t----\n%s" "$recent_wf" | align_cols) | |
# Largest Histories | |
read -r -d '' qstr <<-EOF | |
SELECT | |
history.id, history.name, pg_size_pretty(sum(COALESCE(dataset.total_size, 0))) AS size, history.deleted, history.purged | |
FROM | |
history | |
LEFT JOIN history_dataset_association ON history.id = history_dataset_association.history_id | |
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id | |
WHERE | |
history.user_id = $user_id | |
GROUP BY | |
history.id, history.name | |
ORDER BY | |
sum(COALESCE(dataset.total_size, 0)) DESC | |
LIMIT | |
10 | |
EOF | |
largest_histories=$(query_tsv "$qstr") | |
largest_histories=$(printf "History ID\tName\tSize\tDeleted\tPurged\n----\t----\t----\t----\t----\n%s" "$largest_histories" | align_cols) | |
read -r -d '' template <<EOF | |
# Galaxy User $user_id | |
Property | Value | |
---------- | ----- | |
ID | %s (id=%s) %s | |
Created | %s %s | |
Properties | ext=%s deleted=%s purged=%s active=%s | |
Disk Usage | %s %s | |
## Groups/Roles | |
Groups: %s | |
Roles: %s | |
## Recent Jobs | |
%s | |
## Running Jobs | |
%s | |
## Recent Workflow Invocations | |
%s | |
## Largest Histories | |
%s | |
\n | |
EOF | |
# shellcheck disable=SC2086 | |
# shellcheck disable=SC2059 | |
printf "$template" $results "$group_membership" "$role_membership" "$recent_jobs2" "$running_jobs2" "$recent_wf" "$largest_histories" | |
} | |
report_job-info(){ ## <id>: Information about a specific job | |
handle_help "$@" <<-EOF | |
$ gxadmin report job-info 1 | |
tool_id | state | username | create_time | job_runner_name | job_runner_external_id | |
---------+-------+----------+----------------------------+-----------------+------------------------ | |
upload1 | ok | admin | 2012-12-06 16:34:27.492711 | local:/// | 9347 | |
EOF | |
assert_count $# 1 "Missing Job ID" | |
username=$(gdpr_safe galaxy_user.username username) | |
job_id=$1 | |
### ### | |
# JOB INFO # | |
### ### | |
read -r -d '' qstr <<-EOF | |
SELECT | |
job.tool_id, | |
job.state, | |
job.handler, | |
job.create_time AT TIME ZONE 'UTC' as create_time, | |
COALESCE(job.job_runner_name, '?'), | |
COALESCE(job.job_runner_external_id, '?') | |
FROM job | |
WHERE job.id = $job_id | |
EOF | |
results=$(query_tsv "$qstr") | |
read -r -d '' template <<EOF | |
# Galaxy Job $job_id | |
Property | Value | |
------------- | ----- | |
Tool | %s | |
State | %s | |
Handler | %s | |
Created | %s %s | |
Job Runner/ID | %s / %s | |
EOF | |
# shellcheck disable=SC2059 | |
# shellcheck disable=SC2086 | |
printf "$template" $results | |
### ### | |
# USER INFO # | |
### ### | |
read -r -d '' qstr <<-EOF | |
SELECT | |
$username, | |
job.user_id | |
FROM job, galaxy_user | |
WHERE job.id = $job_id AND job.user_id = galaxy_user.id | |
EOF | |
job_owner=$(query_tsv "$qstr") | |
# shellcheck disable=SC2183 | |
# shellcheck disable=SC2086 | |
printf "\n Owner | %s (id=%s)\n\n" $job_owner | |
### ### | |
# DEST PARAMS # | |
### ### | |
read -r -d '' qstr <<-EOF | |
SELECT convert_from(destination_params::bytea, 'UTF8') | |
FROM job | |
WHERE job.id = $job_id | |
EOF | |
results=$(query_tsv_json "$qstr") | |
printf "## Destination Parameters\n\n" | |
# shellcheck disable=SC2016 | |
tbl=$(echo "$results" | jq -S '. | to_entries[] | [.key, .value] | @tsv' -r | sed 's/\t/\t`/g;s/$/`/g') | |
# shellcheck disable=SC2059 | |
printf "Key\tValue\n---\t---\n$tbl" | align_cols | |
### ### | |
# DEPENDENCIES # | |
### ### | |
read -r -d '' qstr <<-EOF | |
SELECT convert_from(dependencies::bytea, 'UTF8') | |
FROM job | |
WHERE job.id = $job_id | |
EOF | |
results=$(query_tsv_json "$qstr") | |
printf "\n## Dependencies\n\n" | |
tbl=$(echo "$results" | jq -S '.[] | [.name, .version, .dependency_type, .cacheable, .exact, .environment_path, .model_class] | @tsv' -r) | |
# shellcheck disable=SC2059 | |
printf "Name\tVersion\tDependency Type\tCacheable\tExact\tEnvironment Path\tModel Class\n----\t-------\t---------------\t---------\t-----\t----------------\t-----------\n$tbl\n" | align_cols | |
### ### | |
# JOB PARAMS # | |
### ### | |
read -r -d '' qstr <<-EOF | |
SELECT j.name, j.value | |
FROM job_parameter j | |
WHERE j.job_id = $job_id | |
EOF | |
results=$(query_tsv "$qstr") | |
printf "\n## Tool Parameters\n\n" | |
# shellcheck disable=SC2059 | |
printf "Name\tSettings\n-----\t------------\n$results\n\n" | sed 's/[\"\`]//g' | align_cols | |
### ### | |
# INPUTS # | |
### ### | |
read -r -d '' qstr <<-EOF | |
SELECT | |
jtod.job_id, | |
hda.name, | |
hda.extension, | |
hda.id, | |
hda.state, | |
hda.deleted, | |
hda.purged, | |
ds.id, | |
ds.state, | |
ds.deleted, | |
ds.purged, | |
pg_size_pretty(ds.total_size) | |
FROM | |
job_to_input_dataset AS jtid, | |
history_dataset_association AS hda, | |
dataset AS ds, | |
job_to_output_dataset as jtod | |
WHERE | |
jtid.job_id = $1 | |
AND jtid.dataset_id = hda.id | |
AND hda.dataset_id = ds.id | |
AND jtod.dataset_id = ds.id | |
EOF | |
input_ds=$(query_tsv "$qstr") | |
input_ds_tbl=$(printf "\nJob ID\tName\tExtension\thda-id\thda-state\thda-deleted\thda-purged\tds-id\tds-state\tds-deleted\tds-purged\tSize\n----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\n%s" "$input_ds" | align_cols) | |
### ### | |
# OUTPUTS # | |
### ### | |
read -r -d '' qstr <<-EOF | |
SELECT | |
hda.name, | |
hda.extension, | |
hda.id, | |
hda.state, | |
hda.deleted, | |
hda.purged, | |
ds.id, | |
ds.state, | |
ds.deleted, | |
ds.purged, | |
pg_size_pretty(ds.total_size) | |
FROM | |
job_to_output_dataset AS jtod, | |
history_dataset_association AS hda, | |
dataset AS ds | |
WHERE | |
jtod.job_id = $1 | |
AND jtod.dataset_id = hda.id | |
AND hda.dataset_id = ds.id | |
EOF | |
output_ds=$(query_tsv "$qstr") | |
output_ds_tbl=$(printf "Name\tExtension\thda-id\thda-state\thda-deleted\thda-purged\tds-id\tds-state\tds-deleted\tds-purged\tSize\n----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols) | |
read -r -d '' template3 <<EOF | |
## Inputs | |
%s | |
## Outputs | |
%s | |
\n | |
EOF | |
# shellcheck disable=SC2059 | |
printf "$template3" "$input_ds_tbl" "$output_ds_tbl" | |
} | |
report_assigned-to-handler(){ ## <handler>: Report what items are assigned to a handler currently. | |
handle_help "$@" <<-EOF | |
EOF | |
assert_count $# 1 "Missing Handler ID" | |
printf "# Handler %s\n\n" "$1" | |
# Jerbs | |
printf "## Jobs\n\n" | |
read -r -d '' qstr <<-EOF | |
SELECT | |
id, create_time, tool_id, state, object_store_id, user_id | |
FROM job | |
WHERE handler = '$1' and state in ('new', 'queued', 'running') | |
ORDER BY create_time DESC | |
EOF | |
output_ds=$(query_tsv "$qstr") | |
printf "ID\tCreate Time\tTool ID\tState\tObject Store\tUser ID\n----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols | |
# Workflows | |
printf "## Workflows\n\n" | |
read -r -d '' qstr <<-EOF | |
SELECT | |
id, create_time, workflow_id, history_id, state, scheduler, uuid | |
FROM workflow_invocation | |
WHERE handler = '$1' and state = 'new' | |
EOF | |
output_ds=$(query_tsv "$qstr") | |
printf "ID\tCreate Time\tWorkflow ID\tHistory ID\tState\tScheduler\tUUID\n----\t----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols | |
} | |
# Some unlisted functions that are probably not useful as things people might | |
# want to query (and thus have cluttering their query menu), but should be | |
# shared because that's the purpose of gxadmin, sharing all of these weird | |
# queries that we have :) | |
query_memory-and-cpu-on-same-node() { | |
handle_help "$@" <<-EOF | |
Helena needed to check the reported values of memory/cpu over a series | |
of jobs on a specific host, to debug what went wrong with the cgroup | |
metrics. | |
TODO(hxr): find some way to note "unlisted" functions and let people do the help for them? | |
EOF | |
assert_count $# 1 "Missing host name" | |
host="$1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
jmn_a.metric_value AS "memory.memsw.max_usage_in_bytes", | |
jmn_b.metric_value AS "cpuacct.usage", | |
job.update_time | |
FROM | |
job_metric_numeric AS jmn_a, job_metric_numeric AS jmn_b, job | |
WHERE | |
jmn_a.job_id | |
IN ( | |
SELECT | |
job_id | |
FROM | |
job_metric_text | |
WHERE | |
metric_value = '$host' | |
ORDER BY | |
job_id DESC | |
LIMIT | |
200 | |
) | |
AND jmn_a.metric_name = 'memory.memsw.max_usage_in_bytes' | |
AND jmn_b.job_id | |
IN ( | |
SELECT | |
job_id | |
FROM | |
job_metric_text | |
WHERE | |
metric_value = '$host' | |
ORDER BY | |
job_id DESC | |
LIMIT | |
200 | |
) | |
AND jmn_b.metric_name = 'cpuacct.usage' | |
AND jmn_a.job_id = job.id | |
AND jmn_b.job_id = job.id | |
ORDER BY | |
job.create_time DESC | |
EOF | |
} | |
registered_subcommands="$registered_subcommands cluster" | |
_cluster_short_help="WIP" | |
cluster_slurm-stats() { ## : Collect stats on slurm | |
handle_help "$@" <<-EOF | |
EOF | |
error "Not yet implemented" | |
} | |
registered_subcommands="$registered_subcommands server" | |
_server_short_help="Various overall statistics" | |
_server_long_help=" | |
'query' can be exchanged with 'tsvquery' or 'csvquery' for tab- and comma-separated variants. | |
In some cases 'iquery' is supported for InfluxDB compatible output. | |
In all cases 'explainquery' will show you the query plan, in case you need to optimise or index data. 'explainjsonquery' is useful with PEV: http://tatiyants.com/pev/ | |
" | |
server_users() { ## : Count of different classifications of users | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=4" | |
tags="active=0;external=1;deleted=2;purged=3" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
active, external, deleted, purged, count(*) as count | |
FROM | |
galaxy_user | |
$date_filter | |
GROUP BY | |
active, external, deleted, purged | |
EOF | |
} | |
server_groups() { ## : Counts of group memberships | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="AND galaxy_group.create_time AT TIME ZONE 'UTC' <= '$1'::date AND date_trunc('day', user_group_association.create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=1" | |
tags="name=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
galaxy_group.name, count(*) | |
FROM | |
galaxy_group, user_group_association | |
WHERE | |
user_group_association.group_id = galaxy_group.id | |
$date_filter | |
GROUP BY name | |
EOF | |
} | |
server_datasets() { ## : Counts of datasets | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=4;size=5" | |
tags="state=0;deleted=1;purged=2;object_store_id=3" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
COALESCE(state, '__unknown__'), | |
deleted, | |
purged, | |
COALESCE(object_store_id, 'none'), | |
count(*), | |
sum(coalesce(dataset.total_size, dataset.file_size, 0)) | |
FROM | |
dataset | |
$date_filter | |
GROUP BY | |
state, deleted, purged, object_store_id | |
EOF | |
} | |
server_hda() { ## : Counts of HDAs | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="AND date_trunc('day', history_dataset_association.create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="sum=2;avg=3;min=4;max=5;count=6" | |
tags="extension=0;deleted=1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
COALESCE(history_dataset_association.extension, '__unknown__'), | |
history_dataset_association.deleted, | |
COALESCE(sum(dataset.file_size), 0), | |
COALESCE(avg(dataset.file_size), 0)::bigint, | |
COALESCE(min(dataset.file_size), 0), | |
COALESCE(max(dataset.file_size), 0), | |
count(*) | |
FROM | |
history_dataset_association, dataset | |
WHERE | |
history_dataset_association.dataset_id = dataset.id | |
${date_filter} | |
GROUP BY | |
history_dataset_association.extension, history_dataset_association.deleted | |
EOF | |
} | |
server_ts-repos() { ## : Counts of TS repos | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=2" | |
tags="tool_shed=0;owner=1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
tool_shed, owner, count(*) | |
FROM | |
tool_shed_repository | |
$date_filter | |
GROUP BY | |
tool_shed, owner | |
ORDER BY count desc | |
EOF | |
} | |
server_histories() { ## : Counts of histories and sharing | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=6" | |
tags="deleted=0;purged=1;published=2;importable=3;importing=4;genome_build=5" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
deleted, purged, published, importable, importing, COALESCE(genome_build, '__unknown__'), count(*) | |
FROM history | |
${date_filter} | |
GROUP BY | |
deleted, purged, published, importable, importing, genome_build | |
EOF | |
} | |
server_jobs() { ## : Counts of jobs | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=3" | |
tags="state=0;job_runner_name=1;destination_id=2" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
COALESCE(state, '__unknown__'), | |
COALESCE(job_runner_name, '__unknown__'), | |
COALESCE(destination_id, '__unknown__'), count(*) | |
FROM | |
job | |
${date_filter} | |
GROUP BY | |
state, job_runner_name, destination_id | |
EOF | |
} | |
server_allocated-cpu() { ## : CPU time per job runner | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
# TODO: here we select the hostname, don't do that. | |
# Hack for EU's test vs main separation, both submitting job stats. | |
# Solve by either running IN telegraf or doing it in the meta functions | |
fields="cpu_seconds=1" | |
tags="job_runner_name=0;host=2" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.job_runner_name, | |
round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds, | |
'$HOSTNAME' as host | |
FROM | |
job_metric_numeric AS a, | |
job_metric_numeric AS b, | |
job | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'galaxy_slots' | |
${date_filter} | |
GROUP BY | |
job.job_runner_name | |
EOF | |
} | |
server_allocated-gpu() { ## : GPU time per job runner | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="gpu_seconds=1" | |
tags="job_runner_name=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
job.job_runner_name, | |
round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds | |
FROM | |
job_metric_numeric AS a, | |
job_metric_text AS b, | |
job | |
WHERE | |
b.job_id = a.job_id | |
AND a.job_id = job.id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'CUDA_VISIBLE_DEVICES' | |
${date_filter} | |
GROUP BY | |
job.job_runner_name | |
EOF | |
} | |
server_workflows() { ## : Counts of workflows | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=3" | |
tags="deleted=0;importable=1;published=2" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
deleted, importable, published, count(*) | |
FROM | |
stored_workflow | |
${date_filter} | |
GROUP BY | |
deleted, importable, published | |
EOF | |
} | |
server_workflow-invocations() { ## : Counts of workflow invocations | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
fields="count=2" | |
tags="scheduler=0;handler=1" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
COALESCE(scheduler, '__unknown__'), | |
COALESCE(handler, '__unknown__'), | |
count(*) | |
FROM | |
workflow_invocation | |
${date_filter} | |
GROUP BY | |
scheduler, handler | |
EOF | |
} | |
server_groups-disk-usage() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the disk usage for groups | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="AND date_trunc('day', dataset.create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous') | |
fields="storage_usage=1" | |
tags="group_name=0" | |
read -r -d '' QUERY <<-EOF | |
SELECT $groupname, | |
sum(coalesce(dataset.total_size, dataset.file_size, 0)) as "storage_usage" | |
FROM dataset, | |
galaxy_group, | |
user_group_association, | |
history_dataset_association, | |
history | |
WHERE NOT dataset.purged | |
AND dataset.id = history_dataset_association.dataset_id | |
AND history_dataset_association.history_id = history.id | |
AND history.user_id = user_group_association.user_id | |
AND user_group_association.group_id = galaxy_group.id | |
$date_filter | |
GROUP BY galaxy_group.name | |
EOF | |
} | |
server_groups-allocated-cpu() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the CPU allocation for groups | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous') | |
fields="cpu_seconds=1" | |
tags="group_name=0" | |
read -r -d '' QUERY <<-EOF | |
WITH jobs_info AS ( | |
SELECT job.user_id, | |
round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds | |
FROM job_metric_numeric AS a, | |
job_metric_numeric AS b, | |
job | |
WHERE job.id = a.job_id | |
AND job.id = b.job_id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'galaxy_slots' | |
$date_filter | |
GROUP BY job.user_id | |
), user_job_info AS ( | |
SELECT user_id, | |
sum(cpu_seconds) AS cpu_seconds | |
FROM jobs_info | |
GROUP BY user_id | |
) | |
SELECT $groupname, | |
round(sum(user_job_info.cpu_seconds), 2) as cpu_seconds | |
FROM user_job_info, | |
galaxy_group, | |
user_group_association | |
WHERE user_job_info.user_id = user_group_association.user_id | |
AND user_group_association.group_id = galaxy_group.id | |
GROUP BY galaxy_group.name | |
EOF | |
} | |
server_groups-allocated-gpu() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the GPU allocation for groups | |
handle_help "$@" <<-EOF | |
EOF | |
op="=" | |
if (( $# > 1 )); then | |
op="$2" | |
fi | |
date_filter="" | |
if (( $# > 0 )); then | |
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date" | |
fi | |
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous') | |
fields="gpu_seconds=1" | |
tags="group_name=0" | |
read -r -d '' QUERY <<-EOF | |
WITH jobs_info AS ( | |
SELECT job.user_id, | |
round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds | |
FROM job_metric_numeric AS a, | |
job_metric_text AS b, | |
job | |
WHERE job.id = a.job_id | |
AND job.id = b.job_id | |
AND a.metric_name = 'runtime_seconds' | |
AND b.metric_name = 'CUDA_VISIBLE_DEVICES' | |
$date_filter | |
GROUP BY job.user_id | |
), user_job_info AS ( | |
SELECT user_id, | |
sum(gpu_seconds) AS gpu_seconds | |
FROM jobs_info | |
GROUP BY user_id | |
) | |
SELECT $groupname, | |
round(sum(user_job_info.gpu_seconds), 2) as gpu_seconds | |
FROM user_job_info, | |
galaxy_group, | |
user_group_association | |
WHERE user_job_info.user_id = user_group_association.user_id | |
AND user_group_association.group_id = galaxy_group.id | |
GROUP BY galaxy_group.name | |
EOF | |
} | |
server_workflow-trace-archive-metrics() { ## : [Workflow Trace Archive] Export numeric metrics | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT * FROM job_metric_numeric | |
EOF | |
} | |
server_workflow-trace-archive-jobs() { ## : [Workflow Trace Archive] Export jobs | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, | |
create_time, | |
update_time, | |
tool_id, | |
tool_version, | |
state, | |
runner_name, | |
job_runner_name, | |
imported, | |
object_store_id, | |
handler, | |
exit_code, | |
destination_id, | |
copied_from_job_id, | |
dynamic_tool_id, | |
galaxy_version | |
FROM | |
job | |
EOF | |
} | |
server_workflow-trace-archive-workflows() { ## : [Workflow Trace Archive] Export workflows | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, | |
create_time, | |
update_time, | |
stored_workflow_id, | |
has_cycles, | |
has_errors, | |
parent_workflow_id, | |
uuid | |
FROM workflow | |
EOF | |
} | |
server_workflow-trace-archive-workflow-invocations() { ## : [Workflow Trace Archive] Export workflow invocations | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, | |
create_time, | |
update_time, | |
workflow_id, | |
state, | |
scheduler, | |
handler | |
FROM workflow_invocation | |
EOF | |
} | |
server_workflow-trace-archive-workflow-steps() { ## : [Workflow Trace Archive] Export workflow steps | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT id, | |
create_time, | |
update_time, | |
workflow_id, | |
type, | |
tool_id, | |
tool_version, | |
order_index, | |
subworkflow_id, | |
dynamic_tool_id | |
FROM workflow_step | |
EOF | |
} | |
server_workflow-trace-archive-workflow-invocation-steps() { ## : [Workflow Trace Archive] Export workflow invocation steps | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, create_time, update_time, workflow_invocation_id, | |
workflow_step_id, job_id, state | |
FROM workflow_invocation_step | |
EOF | |
} | |
server_workflow-trace-archive-workflow-connections() { ## : [Workflow Trace Archive] Export workflow connections | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, output_step_id, input_step_input_id, output_name, input_subworkflow_step_id | |
FROM workflow_step_connection | |
EOF | |
} | |
server_workflow-trace-archive-workflow-step-input() { ## : [Workflow Trace Archive] Export workflow step-input | |
handle_help "$@" <<-EOF | |
Helper for WTA | |
EOF | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
id, workflow_step_id, name | |
FROM workflow_step_input | |
EOF | |
} | |
registered_subcommands="$registered_subcommands meta" | |
_meta_short_help="Miscellaneous" | |
meta_find_slurpable() { | |
grep -s -h -o '^server_[a-z-]*' "$0" "$GXADMIN_SITE_SPECIFIC" | grep -v 'workflow-trace-archive' | sort | sed 's/server_//g' | |
} | |
meta_update() { ## : Update the script | |
handle_help "$@" <<-EOF | |
EOF | |
tmp=$(mktemp); | |
curl https://raw.githubusercontent.com/galaxyproject/gxadmin/main/gxadmin > "$tmp"; | |
chmod ugo+rx "$tmp"; | |
mv "$tmp" "$0"; | |
exit 0; | |
} | |
meta_cmdlist() { | |
handle_help "$@" <<-EOF | |
EOF | |
IFS=$'\n' | |
# TOC | |
for section in $(locate_cmds_nolocal | correct_cmd | awk '{print $1}' | sort -u); do | |
# Progress | |
echo $section | |
# contents | |
echo "# $section" > "docs/README.${section}.md" | |
echo >> "docs/README.${section}.md" | |
echo "Command | Description" >> "docs/README.${section}.md" | |
echo "------- | -----------" >> "docs/README.${section}.md" | |
for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do | |
cmd_part="$(echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//')" | |
desc_part="$(echo "$command" | sed 's/^[^:]*:\s*//g')" | |
key_part="$(echo "$cmd_part" | sed 's/ /-/g')" | |
if [[ "$command" != *"Deprecated"* ]]; then | |
# Main ToC | |
echo "[\`${cmd_part}\`](#${key_part}) | $desc_part" >> "docs/README.${section}.md" | |
else | |
echo "\`${cmd_part}\` | $desc_part" >> "docs/README.${section}.md" | |
fi | |
done | |
for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do | |
cmd_part="$(echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//;s/\s*$//')" | |
desc_part="$(echo "$command" | sed 's/^[^:]*:\s*//g;s/\s*$//')" | |
key_part="$(echo "$cmd_part" | sed 's/ /-/g')" | |
url_part="$(echo "$cmd_part" | sed 's/ /_/g')" | |
if [[ "$command" != *"Deprecated"* ]]; then | |
# Subsec documentation | |
echo >> "docs/README.${section}.md" | |
echo "## $cmd_part" >> "docs/README.${section}.md" | |
echo >> "docs/README.${section}.md" | |
echo "([*source*](https://github.com/galaxyproject/gxadmin/search?q=${url_part}&type=Code))" >> "docs/README.${section}.md" | |
bash -c "$0 $cmd_part --help" >> "docs/README.${section}.md" | |
fi | |
done | |
done | |
} | |
meta_cmdlist2() { | |
handle_help "$@" <<-EOF | |
EOF | |
IFS=$'\n' | |
# TOC | |
for section in $(locate_cmds_nolocal | correct_cmd | awk '{print $1}' | sort -u); do | |
for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do | |
echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//;s/\s*$//' | |
done | |
done | |
} | |
meta_slurp-current() { ## [--date] [slurp-name [2nd-slurp-name [...]]]: Executes what used to be "Galaxy Slurp" | |
handle_help "$@" <<-EOF | |
Obtain influx compatible metrics regarding the current state of the | |
server. UseGalaxy.EU uses this to display things like "Current user | |
count" and "Current dataset size/distribution/etc." | |
It is expected that you are passing this straight to telegraf, there is | |
no non-influx output supported currently. | |
Supplying --date will include the current timestamp at the end of the | |
line, making it compatible for "gxadmin meta influx-post" usage | |
You can add your own functions which are included in this output, using | |
the \$GXADMIN_SITE_SPECIFIC file. They must start with the prefix | |
"server_", e.g. "server_mymetric". | |
$ gxadmin meta slurp-current | |
server-allocated-cpu,job_runner_name=condor cpu_years=102.00 | |
server-allocated-cpu,job_runner_name=local cpu_years=1346.00 | |
server-datasets,deleted=f,object_store_id=,state=error,purged=f count=37,size=29895528 | |
server-datasets,deleted=f,object_store_id=,state=ok,purged=f count=72,size=76739510 | |
server-datasets,deleted=f,object_store_id=,state=discarded,purged=f count=2,size=0 | |
server-hda,deleted=f,extension=gff3 max=2682565,sum=2682565,avg=2682565,min=2682565 | |
server-hda,deleted=t,extension=tabular max=468549,sum=597843,avg=22142,min=2 | |
server-hda,deleted=f,extension=fastqsanger max=3,sum=3,avg=3,min=3 | |
server-hda,deleted=f,extension=tabular max=2819293,sum=3270268,avg=155727,min=3 | |
server-hda,deleted=f,extension=png max=800459,sum=7047695,avg=503407,min=142863 | |
server-hda,deleted=t,extension=auto max=9571,sum=9571,avg=9571,min=9571 | |
server-hda,deleted=t,extension=data max=2,sum=2,avg=2,min=2 | |
server-hda,deleted=t,extension=txt max=5023,sum=5039,avg=2520,min=16 | |
server-hda,deleted=t,extension=bigwig max=2972569,sum=5857063,avg=1464266,min=0 | |
server-hda,deleted=t,extension=tar.gz max=209034,sum=1318690,avg=188384,min=91580 | |
server-hda,deleted=f,extension=vcf_bgzip max=3965889,sum=3965889,avg=3965889,min=3965889 | |
server-hda,deleted=t,extension=png max=2969711,sum=6584812,avg=1097469,min=183 | |
server-hda,deleted=f,extension=txt max=9584828,sum=132124407,avg=4556014,min=0 | |
server-hda,deleted=f,extension=bigwig max=14722,sum=17407,avg=8704,min=2685 | |
server-hda,deleted=f,extension=tar.gz max=209025,sum=390421,avg=195211,min=181396 | |
server-histories,importable=f,deleted=f,genome_build=?,published=f,importing=f,purged=f count=14 | |
server-jobs,state=ok,destination_id=slurm_singularity,job_runner_name=local count=26 | |
server-jobs,state=error,destination_id=condor_resub,job_runner_name=condor count=1 | |
server-jobs,state=deleted,destination_id=local,job_runner_name=local count=1 | |
server-jobs,state=error,destination_id=condor,job_runner_name=condor count=8 | |
server-jobs,state=ok,destination_id=local,job_runner_name=local count=13 | |
server-jobs,state=ok,destination_id=condor,job_runner_name=condor count=2 | |
server-jobs,state=error,destination_id=local,job_runner_name=local count=3 | |
server-users,active=t,deleted=f,external=f,purged=f count=3 | |
server-workflows,deleted=f,importable=f,published=f count=3 | |
EOF | |
append="" | |
if [[ $1 == "--date" ]]; then | |
append=" "$(date +%s%N) | |
shift; | |
fi | |
specific_slurp=($@) | |
# shellcheck disable=SC2013 | |
for func in $(meta_find_slurpable); do | |
# To allow only slurping the one that was requested, if this was done. | |
if (( ${#specific_slurp[@]} > 0 )); then | |
if [[ ! "${specific_slurp[*]}" =~ "${func}" ]]; then | |
continue | |
fi | |
fi | |
obtain_func "server" "$func" | |
$wrapper query_influx "$QUERY" "server-$query_name" "$fields" "$tags" | sed "s/$/$append/" | |
done | |
} | |
meta_slurp-upto() { ## <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data up to a specific date. | |
handle_help "$@" <<-EOF | |
Obtain influx compatible metrics regarding the summed state of the | |
server up to a specific date. UseGalaxy.EU uses this to display things | |
like charts of "how many users were registered as of date X". | |
This calls all of the same functions as 'gxadmin meta slurp-current', | |
but with date filters for the entries' creation times. | |
EOF | |
date=$1; shift | |
specific_slurp=($@) | |
# shellcheck disable=SC2013 | |
for func in $(meta_find_slurpable); do | |
# To allow only slurping the one that was requested, if this was done. | |
if (( ${#specific_slurp[@]} > 0 )); then | |
if [[ ! "${specific_slurp[*]}" =~ "${func}" ]]; then | |
continue | |
fi | |
fi | |
obtain_func server "$func" "$date" "<=" | |
$wrapper query_influx "$QUERY" "server-$query_name.upto" "$fields" "$tags" | \ | |
sed "s/$/ $(date -d "$date" +%s%N)/" | |
done | |
} | |
meta_slurp-day() { ## <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data on a specific date. | |
handle_help "$@" <<-EOF | |
Obtain influx compatible metrics regarding the state of the | |
server on a specific date. UseGalaxy.EU uses this to display things | |
like charts of "how many users were registered as of date X". You can | |
backfill data from your server by running a for loop like: | |
#!/bin/bash | |
for i in range {1..365}; do | |
gxadmin meta slurp-day \$(date -d "$i days ago" "+%Y-%m-%d") | <get into influx somehow> | |
done | |
It is expected that you are passing this straight to telegraf, there is | |
no non-influx output supported currently. | |
This calls all of the same functions as 'gxadmin meta slurp-current', | |
but with date filters for the entries' creation times. | |
You can add your own functions which are included in this output, using | |
the \$GXADMIN_SITE_SPECIFIC file. They must start with the prefix | |
"server_", e.g. "server_mymetric". They should include a date filter as | |
well, or the metrics reported here will be less useful. | |
$ gxadmin meta slurp-day 2019-01-01 | |
server-allocated-cpu.daily,job_runner_name=condor cpu_years=102.00 | |
server-datasets.daily,deleted=f,object_store_id=,state=error,purged=f count=37,size=29895528 | |
server-datasets.daily,deleted=f,object_store_id=,state=ok,purged=f count=72,size=76739510 | |
server-datasets.daily,deleted=f,object_store_id=,state=discarded,purged=f count=2,size=0 | |
server-hda.daily,deleted=t,extension=data max=2,sum=2,avg=2,min=2 | |
server-hda.daily,deleted=t,extension=txt max=5023,sum=5039,avg=2520,min=16 | |
server-hda.daily,deleted=f,extension=fastqsanger max=3,sum=3,avg=3,min=3 | |
server-hda.daily,deleted=f,extension=tabular max=3,sum=51,avg=3,min=3 | |
server-hda.daily,deleted=t,extension=tabular max=468549,sum=552788,avg=21261,min=2 | |
server-histories.daily,importable=f,deleted=f,genome_build=?,published=f,importing=f,purged=f count=5 | |
server-jobs.daily,state=error,destination_id=condor,job_runner_name=condor count=8 | |
server-jobs.daily,state=error,destination_id=condor_resub,job_runner_name=condor count=1 | |
server-jobs.daily,state=error,destination_id=condor_a,job_runner_name=condor count=11 | |
server-jobs.daily,state=ok,destination_id=condor,job_runner_name=condor count=2 | |
server-jobs.daily,state=ok,destination_id=condor_a,job_runner_name=condor count=23 | |
server-users.daily,active=t,deleted=f,external=f,purged=f count=1 | |
server-workflows.daily,deleted=f,importable=f,published=f count=1 | |
EOF | |
date=$1; shift; | |
specific_slurp=($@) | |
# shellcheck disable=SC2013 | |
for func in $(meta_find_slurpable); do | |
# To allow only slurping the one that was requested, if this was done. | |
if (( ${#specific_slurp[@]} > 0 )); then | |
if [[ ! "${specific_slurp[*]}" =~ "${func}" ]]; then | |
continue | |
fi | |
fi | |
obtain_func server "$func" "$date" | |
$wrapper query_influx "$QUERY" "server-$query_name.daily" "$fields" "$tags" | \ | |
sed "s/$/ $(date -d "$date" +%s%N)/" | |
done | |
} | |
meta_slurp-initial() { ## <yyyy-mm-dd> <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data starting at the first date until the second date. | |
handle_help "$@" <<-EOF | |
Obtains influx compatible metrics between dates and posts this to Influx. | |
This function calls 'gxadmin meta slurp-upto' and 'gxadmin meta slurp-day'. | |
It requires a start and end date. Allows to run specific slurp queries. | |
EOF | |
# Variables | |
begindate=$1; shift | |
enddate=$1; shift | |
specific_slurp=($@) | |
specific_slurp_string="" | |
if (( ${#specific_slurp[@]} > 0 )); then | |
for specificslurppiece in "${specific_slurp[@]}"; do | |
if (( ${#specific_slurp_string} == 0 )); then | |
specific_slurp_string="$specificslurppiece" | |
else | |
specific_slurp_string="$specific_slurp_string $specificslurppiece" | |
fi | |
done | |
fi | |
tmpfile=/tmp/gxadmin-meta-slurp-initial-$(date +%s) | |
# Create temporary file | |
echo "" > $tmpfile | |
# Validate environment | |
assert_set_env INFLUX_DB | |
assert_set_env INFLUX_URL | |
assert_set_env INFLUX_PASS | |
assert_set_env INFLUX_USER | |
# Loop for the data | |
d=$begindate | |
while [ "$d" != $enddate ]; do | |
echo "Slurping for $d" | |
# Slurp the data | |
meta_slurp-upto $d $specific_slurp_string > $tmpfile | |
meta_slurp-day $d $specific_slurp_string >> $tmpfile | |
# Post to influxdb | |
meta_influx-post $INFLUX_DB $tmpfile | |
d=$(date -I -d "$d + 1 day") | |
done | |
rm $tmpfile | |
} | |
meta_error() { | |
error "$@" | |
} | |
meta_warning() { | |
warning "$@" | |
} | |
meta_success() { | |
success "$@" | |
} | |
meta_influx-post() { ## <db> <file>: Post contents of file (in influx line protocol) to influx | |
handle_help "$@" <<-EOF | |
Post data to InfluxDB. Must be [influx line protocol formatted](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/) | |
Posting data from a file: | |
$ gxadmin meta influx-post galaxy file.inflx | |
Posting data from the output of a command | |
$ gxadmin meta influx-post galaxy <(echo "weather,location=us-midwest temperature=\$RANDOM \$(date +%s%N)") | |
Posting data from the output of a gxadmin command | |
$ gxadmin meta influx-post galaxy <(gxadmin meta slurp-current --date) | |
**WARNING** | |
!> If you are sending a LOT of data points, consider splitting | |
!> them. Influx recommends 5-10k lines: | |
!> | |
!> $ split --lines=5000 data.iflx PREFIX | |
!> $ for file in PREFIX*; do gxadmin meta influx-post galaxy $file; done | |
EOF | |
assert_set_env INFLUX_URL | |
assert_set_env INFLUX_PASS | |
assert_set_env INFLUX_USER | |
assert_count_ge $# 2 "Must supply DB and then data file path" | |
DB="$1" | |
FILE="$2" | |
# If the user is reading the output of a command then it'll be a transient | |
# FD and might fail this check? Or it has for me. So if proc is in there, | |
# don't bother asserting that it exists. | |
if [[ "$FILE" != "/proc/"* ]]; then | |
assert_file "$FILE" | |
fi | |
curl --silent -XPOST "${INFLUX_URL}/write?db=${DB}&u=${INFLUX_USER}&p=${INFLUX_PASS}" --data-binary @"${FILE}" | |
} | |
meta_influx-query() { ## <db> "<query>": Query an influx DB | |
handle_help "$@" <<-EOF | |
Query an InfluxDB | |
Query percentage of memory used over last hour. | |
$ gxadmin meta influx-query galaxy "select mean(used_percent) from mem where host='X' AND time > now() - 1h group by time(10m)" | \\ | |
jq '.results[0].series[0].values[] | @tsv' -r | |
2019-04-26T09:30:00Z 64.83119975586277 | |
2019-04-26T09:40:00Z 64.58284600472675 | |
2019-04-26T09:50:00Z 64.62714491344244 | |
2019-04-26T10:00:00Z 64.62339148181154 | |
2019-04-26T10:10:00Z 63.95268353798708 | |
2019-04-26T10:20:00Z 64.66849537282599 | |
2019-04-26T10:30:00Z 65.06069941790024 | |
EOF | |
assert_set_env INFLUX_URL | |
assert_set_env INFLUX_PASS | |
assert_set_env INFLUX_USER | |
assert_count_ge $# 2 "Must supply DB and then query" | |
DB="$1" | |
QUERY="$2" | |
curl --silent "${INFLUX_URL}/query?db=${DB}&u=${INFLUX_USER}&p=${INFLUX_PASS}" --data-urlencode "q=${QUERY}" | |
} | |
meta_iquery-grt-export() { ## : Export data from a GRT database for sending to influx | |
handle_help "$@" <<-EOF | |
**WARNING**: | |
!> GRT database specific query, will not work with a galaxy database! | |
EOF | |
fields="count=4" | |
timestamp="3" | |
tags="tool_id=0;tool_version=1;instance=2" | |
read -r -d '' QUERY <<-EOF | |
SELECT | |
api_job.tool_id, | |
api_job.tool_version, | |
api_galaxyinstance.title, | |
extract(epoch from date_trunc('week', api_job.create_time)) || '000000000' as date, | |
count(*) | |
FROM | |
api_job, api_galaxyinstance | |
WHERE | |
api_job.instance_id = api_galaxyinstance.id | |
GROUP BY | |
api_job.tool_id, | |
api_job.tool_version, | |
api_galaxyinstance.title, | |
date | |
EOF | |
} | |
meta_whatsnew() { ## : What's new in this version of gxadmin | |
handle_help "$@" <<-EOF | |
Informs users of what's new in the changelog since their version | |
EOF | |
current_version=$(version) | |
prev_version=$(( current_version - 1 )) | |
#sed -n '1,/^# 12/d;/^# 11/q;p' | |
echo "$CHANGELOG" | sed -n "/^# ${prev_version}/q;p" | |
} | |
meta_export-grafana-dashboards() { ## [grafana_db|/var/lib/grafana/grafana.db]: Export all dashboards from a Grafana database to CWD and commit them to git. | |
handle_help "$@" <<-EOF | |
Given a grafana database, use sqlite3 to access all of the dashboards within, and then write them out to the current working directly. Next, commit them and update a README. | |
This script forms the basis of https://github.com/usegalaxy-eu/grafana-dashboards | |
**WARNING** | |
!> This script will silently remove all json files from CWD | |
!> as a first step. Additionally it will commit and push at the end, so it | |
!> should be run in a directory that has a git repo initialised, where you | |
!> are not concerned about accidentally pushing to wrong remote. | |
EOF | |
db_path=${1:-/var/lib/grafana/grafana.db} | |
rm -f *.json | |
sqlite3 --csv -separator "$(printf '\t')" $db_path \ | |
'select title,data from dashboard;' | \ | |
awk -F'\t' '{gsub("\"", "", $1); print $2 > $1".json" }' | |
for i in *.json; do | |
q=$(mktemp) | |
cat "$i" | sed 's/^"//;s/"$//g;s/""/"/g' | jq -S . > "$q" | |
mv "$q" "$i"; | |
lines=$(wc -l "$i" | awk '{print $1}') | |
if (( lines < 10 )); then | |
rm "$i" | |
fi | |
done; | |
cat > README.md <<-EOF | |
# Grafana Dashbaords | |
Name | Tags | Version | Live | JSON | |
--- | --- | --- | --- | --- | |
EOF | |
sqlite3 --csv -separator "$(printf '\t')" $db_path \ | |
'SELECT title,uid,title,version,GROUP_CONCAT(dashboard_tag.term) FROM dashboard left outer join dashboard_tag on dashboard.id = dashboard_tag.dashboard_id WHERE dashboard.is_folder = 0 GROUP BY title, uid, title order by title asc' | \ | |
awk -F'\t' '{gsub("\"", "", $1); gsub("\"", "", $3); gsub(" ", "%20", $3); print $1" | "$5" | "$4" | [Live](https://stats.galaxyproject.eu/d/"$2") | [File](./"$3".json)"}' \ | |
>> README.md | |
git add *.json README.md | |
git commit -a -m "Automated commit for $(date)" | |
git push --quiet | |
} | |
meta_wta-report() { ## Export all workflow trace archive queries | |
handle_help "$@" <<-EOF | |
Run through several WTA commands and export those to CSV | |
EOF | |
tmpdir=$(mktemp -d) | |
obtain_func "server" "workflow-trace-archive-metrics" | |
query_csv "$QUERY" > "$tmpdir/job_metrics_numeric.csv" | |
obtain_func "server" "workflow-trace-archive-jobs" | |
query_csv "$QUERY" > "$tmpdir/jobs.csv" | |
obtain_func "server" "workflow-trace-archive-workflows" | |
query_csv "$QUERY" > "$tmpdir/workflows.csv" | |
obtain_func "server" "workflow-trace-archive-workflow-invocations" | |
query_csv "$QUERY" > "$tmpdir/workflow-invocations.csv" | |
obtain_func "server" "workflow-trace-archive-workflow-steps" | |
query_csv "$QUERY" > "$tmpdir/workflow-steps.csv" | |
obtain_func "server" "workflow-trace-archive-workflow-invocation-steps" | |
query_csv "$QUERY" > "$tmpdir/workflow-invoke-steps.csv" | |
obtain_func "server" "workflow-trace-archive-workflow-connections" | |
query_csv "$QUERY" > "$tmpdir/workflow-connections.csv" | |
obtain_func "server" "workflow-trace-archive-workflow-step-input" | |
query_csv "$QUERY" > "$tmpdir/workflow-step-input.csv" | |
tar cfz $tmpdir.tar.gz $tmpdir/* | |
echo $tmpdir.tar.gz | |
} | |
meta_complete-bash() { ## : export bash autocompletion | |
handle_help "$@" <<-EOF | |
Produces the bash autocompletion for gxadmin | |
Write the output of this somewhere useful, and source it in your bash profile. | |
$ gxadmin meta bash-complete > ~/.cache/gxadmin-autocomplete.sh | |
$ . ~/.cache/gxadmin-autocomplete.sh | |
$ gxadmin<TAB> | |
EOF | |
IFS=$'\n' | |
commands=$(locate_cmds_nolocal | correct_cmd | sed 's/://g') | |
leading=$(echo "$commands" | awk '{print $1}' | sort -u | paste -s -d' ') | |
subcommands="" | |
for cmd in $(echo "$commands" | awk '{print $1}' | sort -u); do | |
subcmds=$(echo "$commands" | awk '($1 == "'"$cmd"'"){ print $2 }' | sort -u | paste -s -d' ') | |
if (( ${#subcmds} > 0 )); then | |
subcommands="${subcommands}\n\n\t\t$cmd)\n\t\t\tCOMPREPLY=( \$(compgen -W \"${subcmds}\" \${cur}) )\n\t\t\treturn 0\n\t\t;;" | |
fi | |
done; | |
# Fix newlines | |
subcommands="$(echo -e "$subcommands")" | |
# Template | |
BASH_COMPLETION_TEMPLATE=$(cat <<EOF | |
_gxadmin() { | |
local cur prev opts | |
COMPREPLY=() | |
cur="\${COMP_WORDS[COMP_CWORD]}" | |
prev="\${COMP_WORDS[COMP_CWORD-1]}" | |
local -r cmds="%s" | |
case "\${prev}" in | |
%s | |
esac | |
COMPREPLY=( \$(compgen -W "\${cmds}" -- \${cur}) ) | |
return 0 | |
} | |
complete -F _gxadmin gxadmin | |
EOF | |
) | |
# shellcheck disable=SC2059 | |
printf "${BASH_COMPLETION_TEMPLATE}\n" "$leading" "$subcommands" | |
} | |
obtain_func() { | |
category="$1"; shift | |
query_name="$1"; shift | |
if [[ "$category" != "" ]]; then | |
category="${category}_" | |
fi | |
fn="${category}${query_name}" | |
LC_ALL=C type "$fn" 2> /dev/null | grep -q 'function' | |
ec=$? | |
# ##? enables fancy argument parsing. | |
grep -s -h -o "${fn}()\s*{ ##?\? .*" "$0" | fgrep --quiet -o '##?' | |
# Nothing needs to be done, just let that function handle and auto-parse | |
# and auto-export | |
enable_wap=$? | |
if (( enable_wap == 0 )); then | |
wonderful_argument_parser "$fn" "$@" | |
fi | |
if (( ec == 0 )); then | |
$fn "$@"; | |
else | |
export QUERY="ERROR" | |
fi | |
} | |
wonderful_argument_parser() { | |
# from the top, the function to look for | |
fn=$1; | |
# make it drop, so we can parse the rest | |
shift; | |
declare -a parsed_keys | |
declare -a parsed_vals | |
declare -a positional_args | |
declare -a optional_flag_args | |
declare -a optional_args | |
declare -a args | |
positional_index=0 | |
optional_index=0 | |
# shellcheck disable=SC2207 | |
read -r -a signature <<< $(grep -s -h -o "${fn}()\s*{ ##?\? .*" "$0" | sed 's/.*##?//g;s/: .*//g;') | |
signature+=('[--help]') # This is always available | |
# shellcheck disable=SC2068 | |
for x in $@; do | |
args+=("$x"); | |
# If the help flag is in there, we can short-circuit | |
if [[ "$x" == "--help" ]] || [[ "$x" == "-h" ]]; then | |
return | |
fi | |
shift; | |
done | |
for arg in "${signature[@]}"; do | |
if [[ "$arg" == '<'* ]]; then | |
# This is a required argument | |
positional_args+=("$(echo "$arg" | sed 's/^<//;s/>$//')") | |
elif [[ "$arg" == '[--'* ]]; then | |
# This is an optional argument | |
optional_flag_args+=("$arg") | |
elif [[ "$arg" == '['* ]]; then | |
# This is an optional argument | |
optional_args+=("$(echo "$arg" | sed 's/^\[//;s/\]$//')") | |
else | |
# This is an error | |
echo "ERROR!!! Bad argument specification: $arg" | |
fi | |
done | |
# Size of the arrays | |
positional_count=${#positional_args[@]} | |
optional_count=${#optional_args[@]} | |
offset=0 | |
while true; do | |
# Get the first bit of content from the arguments | |
a_cur=${args[$offset]} | |
if [[ "$a_cur" == "" ]]; then | |
break | |
fi | |
if [[ "$a_cur" == "--"* ]]; then | |
# This is a flag. So find the matching flag definition. | |
for arg in "${optional_flag_args[@]}"; do | |
# Two types of args: with, without values | |
if [[ "$arg" == "[--"* ]]; then | |
if [[ "$arg" == *'|'* ]]; then | |
# This has another argument. | |
# So we need to pop something else. | |
# And increment offset so we don't re-process it | |
if [[ "$arg" == '['$a_cur'|'* ]]; then | |
val="${args[$offset+1]}" | |
offset=$(( offset + 1 )) | |
k="$(echo "$a_cur" | sed 's/^--//;s/-/_/g')" | |
parsed_keys+=("${k}") | |
parsed_vals+=("$val") | |
fi | |
else | |
# This is just a flag | |
if [[ "$arg" == '['$a_cur']' ]]; then | |
k="$(echo "$a_cur" | sed 's/^--//;s/-/_/g')" | |
parsed_keys+=("${k}") | |
parsed_vals+=(1) | |
fi | |
fi | |
fi | |
done | |
else | |
# This is a non-flag, so maybe a positional, maybe an optional argument. | |
# So we need to find the Nth positional (via positional_index) | |
if (( (positional_index + optional_index) >= (positional_count + optional_count) )); then | |
echo "Error: more positional arguments than should be possible" | |
exit 1; | |
fi | |
if (( positional_index < positional_count )); then | |
parsed_keys+=("${positional_args[$positional_index]}") | |
parsed_vals+=("${a_cur}") | |
positional_index=$((positional_index + 1)) | |
else | |
# We're past the positional and into the optional | |
k="${optional_args[$optional_index]}" | |
parsed_keys+=("$(echo "$k" | sed 's/|.*//g')") | |
parsed_vals+=("${a_cur}") | |
optional_index=$(( optional_index + 1 )) | |
fi | |
fi | |
offset=$(( offset + 1 )) | |
done | |
# Set all default optional args, if they aren't set | |
if (( optional_index < optional_count )); then | |
for i in $(seq $optional_index $((optional_count - 1)) ); do | |
if [[ "${optional_args[$i]}" == *'|'* ]]; then | |
k="${optional_args[$i]}" | |
parsed_keys+=("$(echo "$k" | sed 's/|.*//g')") | |
parsed_vals+=("$(echo "$k" | sed 's/.*|//g')") | |
fi | |
done | |
fi | |
if (( positional_index < positional_count )); then | |
for i in $(seq $positional_index $(( positional_count - 1 )) ); do | |
error "Required argument <${positional_args[$i]}> is missing" | |
done | |
exit 1; | |
fi | |
size=${#parsed_keys[@]} | |
for i in $(seq 0 $((size - 1))); do | |
#printf "\t%10s=%-10s\n" "${parsed_keys[$i]}" "${parsed_vals[$i]}" | |
export arg_${parsed_keys[$i]}=${parsed_vals[$i]} | |
done | |
} | |
look_for() { | |
query_type="$1"; shift | |
group_name="$1"; shift | |
query_name="$1"; shift | |
# query_type group_name query_name | |
# query tsvquery users-total | |
# galaxy none somethingelse | |
#echo "HIII $query_type $group_name $query_name" | |
if [[ $query_type == "local" ]]; then | |
if [[ ! -f ${GXADMIN_SITE_SPECIFIC} ]]; then | |
error "No local functions are defined in ${GXADMIN_SITE_SPECIFIC}" | |
exit 1 | |
else | |
# Load functions | |
# shellcheck disable=SC1090 | |
. "${GXADMIN_SITE_SPECIFIC}" | |
fi | |
fi | |
# Check that FN exists | |
fn="${query_type}_${query_name}" | |
LC_ALL=C type "$fn" 2> /dev/null | grep -q 'function' | |
ec=$? | |
if (( ec != 0 )); then | |
didyoumean "${query_type}" "${query_name}" | |
fi | |
# Set the application name to something useful. (Makes it easier to figure | |
# out which gxadmin command is hogging resources, etc.) | |
export PGAPPNAME=gxadmin.${query_type}.${query_name} | |
if [[ $query_type == "query" ]]; then | |
obtain_func "$query_type" "$query_name" "$@" | |
# If query in error, exit. | |
if [[ "$QUERY" == "ERROR" ]]; then | |
error "Error" | |
usage query | |
fi | |
# Run the queries | |
case "$group_name" in | |
tsvquery ) query_tsv "$QUERY";; | |
csvquery ) query_csv "$QUERY";; | |
query ) query_tbl "$QUERY";; | |
jsonquery ) query_json "$QUERY";; | |
iquery ) query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp";; | |
explainquery ) query_exp "$QUERY";; | |
explainjsonquery ) query_expj "$QUERY";; | |
echoquery ) query_echo "$QUERY";; | |
# default | |
* ) usage "Error";; | |
esac | |
elif [[ $query_type == "server" ]]; then | |
obtain_func "$query_type" "$query_name" "$@" | |
# If query in error, exit. | |
if [[ "$QUERY" == "ERROR" ]]; then | |
error "Error" | |
usage server | |
fi | |
# Run the queries | |
case "$group_name" in | |
tsvserver ) query_tsv "$QUERY";; | |
csvserver ) query_csv "$QUERY";; | |
server ) query_tbl "$QUERY";; | |
jsonserver ) query_json "$QUERY";; | |
iserver ) query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp";; | |
explainserver ) query_exp "$QUERY";; | |
explainjsonserver ) query_expj "$QUERY";; | |
echoserver ) query_echo "$QUERY";; | |
# default | |
* ) usage "Error";; | |
esac | |
elif [[ $query_type == "mutate" ]]; then | |
obtain_func "$query_type" "$query_name" "$@" | |
# If query in error, exit. | |
if [[ "$QUERY" == "ERROR" ]]; then | |
error "Error" | |
usage mutate | |
fi | |
# Run the queries | |
case "$group_name" in | |
mutate ) query_tbl "$QUERY";; | |
explainmutate ) query_exp "$QUERY";; | |
explainjsonmutate ) query_expj "$QUERY";; | |
echomutate ) query_echo "$QUERY";; | |
# default | |
* ) usage "Error";; | |
esac | |
elif [[ $query_type == "local" ]]; then | |
if [[ -z "${GXADMIN_BUGGER_OFF}" ]] && (( (RANDOM % 25) == 0 )); then | |
warning "Hey! It's great that you're using gxadmin! You should contribute these functions back :) Other people might find these useful, or could learn something from the code you've written, even if you think it is too specific to your site." | |
fi | |
$fn "$@"; | |
if [[ "${query_name}" == "query"* ]]; then | |
query_tbl "$QUERY" | |
fi | |
else | |
$fn "$@"; | |
if [[ "${query_name}" == "query"* ]]; then | |
query_tbl "$QUERY" | |
elif [[ "${query_name}" == "iquery"* ]]; then | |
query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp" | |
fi | |
fi | |
} | |
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance | |
levenshtein=' | |
function levenshtein(str1, str2, cost_ins, cost_rep, cost_del, str1_len, str2_len, matrix, is_match, i, j, x, y, z) { | |
if(cost_ins == "") cost_ins = 1 | |
if(cost_rep == "") cost_rep = 1 | |
if(cost_del == "") cost_del = 1 | |
str1_len = length(str1) | |
str2_len = length(str2) | |
if(str1_len == 0) return str2_len * cost_ins | |
if(str2_len == 0) return str1_len * cost_del | |
matrix[0, 0] = 0 | |
for(i = 1; i <= str1_len; i++) { | |
matrix[i, 0] = i * cost_del | |
for(j = 1; j <= str2_len; j++) { | |
matrix[0, j] = j * cost_ins | |
x = matrix[i - 1, j] + cost_del | |
y = matrix[i, j - 1] + cost_ins | |
z = matrix[i - 1, j - 1] + (substr(str1, i, 1) == substr(str2, j, 1) ? 0 : cost_rep) | |
x = x < y ? x : y | |
matrix[i, j] = x < z ? x : z | |
} | |
} | |
return matrix[str1_len, str2_len] | |
} | |
{ | |
print levenshtein($1, $2)"\t"$1; | |
}' | |
levenshtein_filter(){ | |
cat | awk -f <(echo "$levenshtein") | sort -nk1 | head -n 10 | |
} | |
wrap_time() { | |
local TIMEFORMAT="Time to execute $1: %R seconds" | |
time "$@" | |
} | |
search() { # <term>: Search for a specific command | |
if (( $# > 0 )); then | |
if command -v fzf &> /dev/null; then | |
res=$(locate_cmds | correct_cmd | fzf -q "$1" | sed 's/[:<\[].*//g') | |
fzf_autorun_cmd | |
else | |
locate_cmds | correct_cmd | grep "$1" | colour_word "$1" orange | |
fi | |
else | |
if command -v fzf &> /dev/null; then | |
res=$(locate_cmds | correct_cmd | fzf | sed 's/[:<\[].*//g') | |
fzf_autorun_cmd | |
else | |
locate_cmds | correct_cmd | |
fi | |
fi | |
} | |
if (( $# == 0 )); then | |
usage | |
fi | |
mode="$1"; shift | |
wrapper= | |
if [[ "$mode" == time* ]]; then | |
wrapper=wrap_time | |
mode=${mode:4} | |
fi | |
case "$mode" in | |
#*query ) $wrapper query "$mode" "$@" ;; | |
*query ) $wrapper look_for "query" "$mode" "$@" ;; | |
*server) $wrapper look_for "server" "$mode" "$@" ;; | |
*mutate) $wrapper look_for "mutate" "$mode" "$@" ;; | |
config ) $wrapper look_for "$mode" "none" "$@" ;; | |
cluster) $wrapper look_for "$mode" "none" "$@" ;; | |
filter ) $wrapper look_for "$mode" "none" "$@" ;; | |
galaxy ) $wrapper look_for "$mode" "none" "$@" ;; | |
meta ) $wrapper look_for "$mode" "none" "$@" ;; | |
report ) $wrapper look_for "$mode" "none" "$@" ;; | |
uwsgi ) $wrapper look_for "$mode" "none" "$@" ;; | |
local ) $wrapper look_for "$mode" "none" "$@" ;; | |
s ) search "$@" ;; | |
find ) search "$@" ;; | |
search ) search "$@" ;; | |
# version commands | |
version ) version ;; | |
-v ) version ;; | |
--version ) version ;; | |
# help | |
help ) usage;; | |
-h ) usage;; | |
--help ) usage;; | |
# anything else | |
* ) didyoumean "$mode" "$@";; | |
esac |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment