Skip to content

Instantly share code, notes, and snippets.

@jindrichmynarz
Last active September 23, 2016 09:50
Show Gist options
  • Save jindrichmynarz/97e528372b618161f5be3b4aaaeb7225 to your computer and use it in GitHub Desktop.
Save jindrichmynarz/97e528372b618161f5be3b4aaaeb7225 to your computer and use it in GitHub Desktop.
Diff versions of a code list
#!/bin/bash
#
# Compute the diff between SKOS code lists.
# Usage: ./code_list_diff.sh [-h|--help] old_version new_version
set -e
shopt -s extglob
V1=$1
V2=$2
TEST_QUERY="ASK { [] a <http://www.w3.org/2004/02/skos/core#ConceptScheme> . }"
die () {
echo >&2 "$@"
exit 1
}
select_query () {
arq --results CSV --namedGraph $V1 --namedGraph $V2 "${1}" | column -s, -t
}
select_one () {
select_query "$1" | tail -1
}
ask_query () {
select_one "$1" | grep -q true && echo "Yes" || echo "No"
}
usage () {
echo "Usage: ./code_list_diff.sh [-h|--help] old_version new_version"
echo "Computes differences between old_version and new_version of a SKOS code list."
echo "Versions of code lists must be provided in an RDF serialization."
exit 0
}
heading () {
printf "\n### %s ###\n" "$1"
}
[[ "-h --help" =~ $1 ]] && usage
# Test if arq is installed.
command -v arq >/dev/null 2>&1 || die "Missing Jena ARQ!"
# Test correct number of arguments.
if [ "$#" -ne 2 ]; then
die "You must provide two code lists!"
fi
! [ $V1 == $V2 ] || die "Cannot diff the same file."
for i in "$@"
do
# Test if the diffed files exist.
if [ ! -f $i ]; then
die "File $i not found!"
fi
# Test if the diffed files contain skos:ConceptSchemes.
arq --results CSV --data $i "$TEST_QUERY" | tail -1 | grep -q true ||
die "File $i is missing a skos:ConceptScheme!"
done
heading "Number of concepts in $(basename "$V1")"
COUNT_Q1=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT (COUNT(DISTINCT ?concept) AS ?count)
WHERE {
GRAPH <$V1> {
?concept a skos:Concept .
}
}
END)
select_one "$COUNT_Q1"
heading "Number of concepts in $(basename "$V2")"
COUNT_Q2=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT (COUNT(DISTINCT ?concept) AS ?count)
WHERE {
GRAPH <$V2> {
?concept a skos:Concept .
}
}
END)
select_one "$COUNT_Q2"
heading "Are there concepts sharing labels in $(basename "$V1")?"
DUPLICATE_LABELS_Q1=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
ASK
WHERE {
GRAPH <$V1> {
?concept1 a skos:Concept ;
skos:prefLabel ?prefLabel .
?concept2 a skos:Concept ;
skos:prefLabel ?prefLabel .
FILTER (!sameTerm(?concept1, ?concept2))
}
}
END)
ask_query "$DUPLICATE_LABELS_Q1"
heading "Are there concepts sharing notations in $(basename "$V1")?"
DUPLICATE_NOTATIONS_Q1=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
ASK
WHERE {
GRAPH <$V1> {
?concept1 a skos:Concept ;
skos:notation ?notation .
?concept2 a skos:Concept ;
skos:notation ?notation .
FILTER (!sameTerm(?concept1, ?concept2))
}
}
END)
ask_query "$DUPLICATE_NOTATIONS_Q1"
heading "Count the same concepts"
Q1=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT (COUNT(DISTINCT ?concept) AS ?count)
WHERE {
GRAPH <$V1> {
?concept a skos:Concept .
}
GRAPH <$V2> {
?concept a skos:Concept .
}
}
END)
select_one "$Q1"
heading "Count concepts sharing the same label and notation"
Q2=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT (COUNT(*) AS ?count)
WHERE {
{
SELECT DISTINCT ?prefLabel ?notation
WHERE {
GRAPH <$V1> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel ;
skos:notation ?notation .
}
GRAPH <$V2> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel ;
skos:notation ?notation .
}
}
}
}
END)
select_one "$Q2"
heading "Concepts sharing the same notation with labels differing in character case"
Q3=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT DISTINCT ?notation ?prefLabel1 ?prefLabel2
WHERE {
GRAPH <$V1> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel1 ;
skos:notation ?notation .
}
GRAPH <$V2> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel2 ;
skos:notation ?notation .
}
FILTER (!sameTerm(?prefLabel1, ?prefLabel2) && (LCASE(?prefLabel1) = LCASE(?prefLabel2)))
FILTER NOT EXISTS {
GRAPH <$V1> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel2 ;
skos:notation ?notation .
}
}
}
ORDER BY ?notation
END)
select_query "$Q3"
heading "Concepts sharing the same label that changed notations"
Q4=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT DISTINCT ?prefLabel ?notation1 ?notation2
WHERE {
GRAPH <$V1> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel ;
skos:notation ?notation1 .
}
GRAPH <$V2> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel ;
skos:notation ?notation2 .
}
FILTER (!sameTerm(?notation1, ?notation2))
FILTER NOT EXISTS {
GRAPH <$V1> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel ;
skos:notation ?notation2 .
}
}
}
ORDER BY ?prefLabel
END)
select_query "$Q4"
heading "Concepts with prolonged labels"
Q5=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT DISTINCT ?notation ?prefLabel1 ?prefLabel2
WHERE {
GRAPH <$V1> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel1 ;
skos:notation ?notation .
}
GRAPH <$V2> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel2 ;
skos:notation ?notation .
}
FILTER (!sameTerm(?prefLabel1, ?prefLabel2) && (STRSTARTS(?prefLabel2, ?prefLabel1)))
FILTER NOT EXISTS {
GRAPH <$V1> {
[] a skos:Concept ;
skos:notation ?notation ;
skos:prefLabel ?prefLabel2 .
}
}
}
END)
select_query "$Q5"
heading "Concepts with shortened labels"
Q6=$(cat <<-END
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT DISTINCT ?notation ?prefLabel1 ?prefLabel2
WHERE {
GRAPH <$V1> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel1 ;
skos:notation ?notation .
}
GRAPH <$V2> {
[] a skos:Concept ;
skos:prefLabel ?prefLabel2 ;
skos:notation ?notation .
}
FILTER (!sameTerm(?prefLabel1, ?prefLabel2) && (STRSTARTS(?prefLabel1, ?prefLabel2)))
FILTER NOT EXISTS {
GRAPH <$V1> {
[] a skos:Concept ;
skos:notation ?notation ;
skos:prefLabel ?prefLabel2 .
}
}
}
END)
select_query "$Q6"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment