Skip to content

Instantly share code, notes, and snippets.

@td-shi
Last active February 24, 2020 13:11
Show Gist options
  • Select an option

  • Save td-shi/adde497e883d3cbf2c668433c3b99315 to your computer and use it in GitHub Desktop.

Select an option

Save td-shi/adde497e883d3cbf2c668433c3b99315 to your computer and use it in GitHub Desktop.
Converting XML to (like) XPATH. [List Repository](https://github.com/td-shi/ShellScriptsOnGist)
#!/bin/bash --posix
# -*- coding:utf-8 -*-
# === Coding shell scripting Memo ==============================================
# ${<name>#<pattern>} :: matching delete with shortest by forword.
# ${<name>##<pattern>} :: matching delete with longest by forword.
# ${<name>%<pattern>} :: matching delete with shortest by backword.
# ${<name>%%<pattern>} :: mathing delete with longest by backword.
# ${<name>/<before>/<after>} :: replace only first matching.
# ${<name>//<before>/<after>} :: replace all matching.
# ${<name>:-<value>} :: if no yet set value, return value.
# ${<name>:=<value>} :: if no yet set value, return value and set.
# ". <shell script>" is to keep current shell and take over environment.
# === Initialize shell environment =============================================
#set -u # Just stop undefined values.
#set -e # Just stop error.
#set -x # Debug running command.
umask 0022
export LC_ALL=C
export LANG=C
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/bin:${PATH+:}${PATH-}"
type command >/dev/null 2>&1 && type getconf >/dev/null 2>&1 &&
export PATH="$(command -p getconf PATH):${PATH}"
export UNIX_STD=2003 # to make HP-UX conform to POSIX
# === Define the functions for printing usage and error message ================
usage_and_exit(){
cat <<-" USAGE" 1>&2
# About
toXPATH.sh convert to (like) XPATH from XML.
# Usage
toXPATH.sh [options] [<XML file>]
+ [sample.xml](https://www.w3schools.com/xml/schema_example.asp)
```
$> toXPATH.sh sample.xml
/shiporder[1]@orderid 889923
/shiporder[1]@xmlns:xsi http://www.w3.org/2001/XMLSchema-instance
/shiporder[1]@xsi:noNamespaceSchemaLocation shiporder.xsd
/shiporder[1]/orderperson[1] John Smith
/shiporder[1]/shipto[1]/name[1] Ola Nordmann
/shiporder[1]/shipto[1]/address[1] Langgt 23
/shiporder[1]/shipto[1]/city[1] 4000 Stavanger
/shiporder[1]/shipto[1]/country[1] Norway
/shiporder[1]/item[1]/title[1] Empire Burlesque
/shiporder[1]/item[1]/note[1] Special Edition
/shiporder[1]/item[1]/price[1] 10.90
/shiporder[1]/item[2]/title[1] Hide your heart
/shiporder[1]/item[2]/price[1] 9.90
```
# Options
+ -h |--help |--version
- This help.
# Version
2020-02-24T20:10:10 0.02
# LICENSE
[CC0(Public domain)](https://creativecommons.org/publicdomain/zero/1.0/legalcode)
# Author
2020 TD
USAGE
exit 1
}
error_exit() {
${2+:} false && echo "${0##*/}: $2" 1>&2
exit "$1"
}
# === Initialize parameters ====================================================
# Detect home directory of this app. and define more
#Homedir="$(d=${0%/*}/; [ "_$d" = "_$0/" ] && d='./'; cd "$d.."; pwd)"
#PATH="$Homedir/<Add Dir>:$PATH" # for additional command
LF=$( printf '\\\012_');LF=${LF%_} # for sed command.
NROOT="NROOT"
#. "$Homedir/<shell script config-file.>" # configration value.
input=''
# === Confirm that the required commands exist =================================
# --- cURL or Wget (exsample)
#if type curl >/dev/null 2>&1; then
# CMD_CURL='curl'
#elif type wget >/dev/null 2>&1; then
# CMD_WGET='wget'
#else
# error_exit 1 'No HTTP-GET/POST command found.'
#fi
# === Print usage and exit if one of the help options is set ===================
case "$# ${1:-}" in
'1 -h'|'1 --help'|'1 --version') usage_and_exit;;
esac
# === Read options =============================================================
while :; do
case "${1:-}" in
--|-)
break
;;
--*|-*)
error_exit 1 'Invalid option'
;;
*)
break
;;
esac
done
# === Require parameters check =================================================
#printf '%s\n' "${zzz}" | grep -Eq '^$|^-?[0-9.]+,-?[0-9.]+$' || {
# error_exit 1 'Invalid -l,--location option'
#}
# === Last parameter ===========================================================
case $# in
0) input=$(cat -)
;;
1) case "${1:-}" in
'--') usage_and_exit;;
'-') input=$(cat -) ;;
*) input=$(cat "$1"); NROOT="$1" ;;
esac
;;
*) case "$1" in '--') shift;; esac
input=$(cat "$@")
;;
esac # Escape 0x0A to 0x1E
# === Define funcitons =========================================================
# === Main routine =============================================================
printf "%s" "${input}" |\
sed "/<?[^?]*?>/d" |\
sed "s/^[[:blank:]]*/ /" |\
awk '{printf $0;}' |\
sed "s:<\([^ ]*\)\(\( [^ ]*\)\{0,\}\)/>:<\1\2></\1>:g" |\
sed 's/\(<[^>]*>\)/'"${LF}"'\1'"${LF}"'/g' |\
sed "s/ >/>/g" |\
sed 's/<!--/'"${LF}"'<!--'"${LF}"'/g' |\
sed 's/-->/-->'"${LF}"'/g' |\
sed '/^<!--/,/-->$/d' |\
sed 's/ *$/'"${LF}"'/g' |\
sed "s/^ *//g" |\
sed "/^$/d" |\
awk '
function chgPathString( pos){
for (M = 0; M < N; M++){
pos = index(StackHead[M], " ");
tmp = sprintf("/%s", substr(StackHead[M], 1, pos));
tmp = tmp sprintf("[%d]", substr(StackHead[M], pos + 1));
gsub(/ /, "", tmp);
printf("%s", tmp);
}
return;
}
function chgOptString(option, term){
sub(/^[ >]+/, "", option);
match(option, /^[^ =]+=\"[^\"]*\"/);
if (-1 != RLENGTH) {
term = substr(option, RSTART, RLENGTH);
sub(/^[^ =]+=\"[^\"]*\"/, "", option);
sub(/=/, " ", term);
gsub(/\"/, "", term);
chgPathString();
printf("@%s\n", term);
chgOptString(option);
}
return;
}
BEGIN{ StackHead[0] = "'"${NROOT}"' 1"; StackBody[0] = ""; N = 1; }
# Part of Text.
/^[^< ].+$/{
chgPathString();
printf(" %s\n", $0);
}
# Open Tag.
/^<[^\/].+$/{
sub(/^</,"");
sub(/>$/,"");
stag = $1;
sub(/^[^ ]+/,"");
record = $0;
if (index(StackBody[N-1], stag)) {
search = stag " [0-9]+";
match(StackBody[N-1], search);
pp = substr(StackBody[N-1], RSTART, RLENGTH);
sub(search, "", StackBody[N-1]);
match(pp, /[0-9]+/);
pp = substr(pp, RSTART, RLENGTH);
pp = pp + 1;
StackBody[N-1] = StackBody[N-1] " " stag " " pp;
StackHead[N] = stag " " pp;
StackBody[N] = "";
}
else {
StackBody[N-1] = StackBody[N-1] " " stag " 1";
StackHead[N] = stag " 1";
StackBody[N] = "";
}
sub(/^ +/, "", StackBody[N-1])
N = N + 1;
chgOptString(record);
}
# Close Tag.
/^<\/.+$/ { StackHead[N] = ""; StackBody[N] = ""; N = N - 1; }
' |\
sed 's:^/'"${NROOT}"'\[[0-9]*\]::g'
# === End shell script =========================================================
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment