Last active
September 23, 2016 09:50
-
-
Save jindrichmynarz/97e528372b618161f5be3b4aaaeb7225 to your computer and use it in GitHub Desktop.
Diff versions of a code list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Compute the diff between SKOS code lists. | |
# Usage: ./code_list_diff.sh [-h|--help] old_version new_version | |
set -e | |
shopt -s extglob | |
V1=$1 | |
V2=$2 | |
TEST_QUERY="ASK { [] a <http://www.w3.org/2004/02/skos/core#ConceptScheme> . }" | |
die () { | |
echo >&2 "$@" | |
exit 1 | |
} | |
select_query () { | |
arq --results CSV --namedGraph $V1 --namedGraph $V2 "${1}" | column -s, -t | |
} | |
select_one () { | |
select_query "$1" | tail -1 | |
} | |
ask_query () { | |
select_one "$1" | grep -q true && echo "Yes" || echo "No" | |
} | |
usage () { | |
echo "Usage: ./code_list_diff.sh [-h|--help] old_version new_version" | |
echo "Computes differences between old_version and new_version of a SKOS code list." | |
echo "Versions of code lists must be provided in an RDF serialization." | |
exit 0 | |
} | |
heading () { | |
printf "\n### %s ###\n" "$1" | |
} | |
[[ "-h --help" =~ $1 ]] && usage | |
# Test if arq is installed. | |
command -v arq >/dev/null 2>&1 || die "Missing Jena ARQ!" | |
# Test correct number of arguments. | |
if [ "$#" -ne 2 ]; then | |
die "You must provide two code lists!" | |
fi | |
! [ $V1 == $V2 ] || die "Cannot diff the same file." | |
for i in "$@" | |
do | |
# Test if the diffed files exist. | |
if [ ! -f $i ]; then | |
die "File $i not found!" | |
fi | |
# Test if the diffed files contain skos:ConceptSchemes. | |
arq --results CSV --data $i "$TEST_QUERY" | tail -1 | grep -q true || | |
die "File $i is missing a skos:ConceptScheme!" | |
done | |
heading "Number of concepts in $(basename "$V1")" | |
COUNT_Q1=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT (COUNT(DISTINCT ?concept) AS ?count) | |
WHERE { | |
GRAPH <$V1> { | |
?concept a skos:Concept . | |
} | |
} | |
END) | |
select_one "$COUNT_Q1" | |
heading "Number of concepts in $(basename "$V2")" | |
COUNT_Q2=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT (COUNT(DISTINCT ?concept) AS ?count) | |
WHERE { | |
GRAPH <$V2> { | |
?concept a skos:Concept . | |
} | |
} | |
END) | |
select_one "$COUNT_Q2" | |
heading "Are there concepts sharing labels in $(basename "$V1")?" | |
DUPLICATE_LABELS_Q1=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
ASK | |
WHERE { | |
GRAPH <$V1> { | |
?concept1 a skos:Concept ; | |
skos:prefLabel ?prefLabel . | |
?concept2 a skos:Concept ; | |
skos:prefLabel ?prefLabel . | |
FILTER (!sameTerm(?concept1, ?concept2)) | |
} | |
} | |
END) | |
ask_query "$DUPLICATE_LABELS_Q1" | |
heading "Are there concepts sharing notations in $(basename "$V1")?" | |
DUPLICATE_NOTATIONS_Q1=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
ASK | |
WHERE { | |
GRAPH <$V1> { | |
?concept1 a skos:Concept ; | |
skos:notation ?notation . | |
?concept2 a skos:Concept ; | |
skos:notation ?notation . | |
FILTER (!sameTerm(?concept1, ?concept2)) | |
} | |
} | |
END) | |
ask_query "$DUPLICATE_NOTATIONS_Q1" | |
heading "Count the same concepts" | |
Q1=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT (COUNT(DISTINCT ?concept) AS ?count) | |
WHERE { | |
GRAPH <$V1> { | |
?concept a skos:Concept . | |
} | |
GRAPH <$V2> { | |
?concept a skos:Concept . | |
} | |
} | |
END) | |
select_one "$Q1" | |
heading "Count concepts sharing the same label and notation" | |
Q2=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT (COUNT(*) AS ?count) | |
WHERE { | |
{ | |
SELECT DISTINCT ?prefLabel ?notation | |
WHERE { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel ; | |
skos:notation ?notation . | |
} | |
GRAPH <$V2> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel ; | |
skos:notation ?notation . | |
} | |
} | |
} | |
} | |
END) | |
select_one "$Q2" | |
heading "Concepts sharing the same notation with labels differing in character case" | |
Q3=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT DISTINCT ?notation ?prefLabel1 ?prefLabel2 | |
WHERE { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel1 ; | |
skos:notation ?notation . | |
} | |
GRAPH <$V2> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel2 ; | |
skos:notation ?notation . | |
} | |
FILTER (!sameTerm(?prefLabel1, ?prefLabel2) && (LCASE(?prefLabel1) = LCASE(?prefLabel2))) | |
FILTER NOT EXISTS { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel2 ; | |
skos:notation ?notation . | |
} | |
} | |
} | |
ORDER BY ?notation | |
END) | |
select_query "$Q3" | |
heading "Concepts sharing the same label that changed notations" | |
Q4=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT DISTINCT ?prefLabel ?notation1 ?notation2 | |
WHERE { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel ; | |
skos:notation ?notation1 . | |
} | |
GRAPH <$V2> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel ; | |
skos:notation ?notation2 . | |
} | |
FILTER (!sameTerm(?notation1, ?notation2)) | |
FILTER NOT EXISTS { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel ; | |
skos:notation ?notation2 . | |
} | |
} | |
} | |
ORDER BY ?prefLabel | |
END) | |
select_query "$Q4" | |
heading "Concepts with prolonged labels" | |
Q5=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT DISTINCT ?notation ?prefLabel1 ?prefLabel2 | |
WHERE { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel1 ; | |
skos:notation ?notation . | |
} | |
GRAPH <$V2> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel2 ; | |
skos:notation ?notation . | |
} | |
FILTER (!sameTerm(?prefLabel1, ?prefLabel2) && (STRSTARTS(?prefLabel2, ?prefLabel1))) | |
FILTER NOT EXISTS { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:notation ?notation ; | |
skos:prefLabel ?prefLabel2 . | |
} | |
} | |
} | |
END) | |
select_query "$Q5" | |
heading "Concepts with shortened labels" | |
Q6=$(cat <<-END | |
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
SELECT DISTINCT ?notation ?prefLabel1 ?prefLabel2 | |
WHERE { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel1 ; | |
skos:notation ?notation . | |
} | |
GRAPH <$V2> { | |
[] a skos:Concept ; | |
skos:prefLabel ?prefLabel2 ; | |
skos:notation ?notation . | |
} | |
FILTER (!sameTerm(?prefLabel1, ?prefLabel2) && (STRSTARTS(?prefLabel1, ?prefLabel2))) | |
FILTER NOT EXISTS { | |
GRAPH <$V1> { | |
[] a skos:Concept ; | |
skos:notation ?notation ; | |
skos:prefLabel ?prefLabel2 . | |
} | |
} | |
} | |
END) | |
select_query "$Q6" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment