Last active
August 29, 2015 14:01
-
-
Save emiliom/9843a515c7a524931467 to your computer and use it in GitHub Desktop.
#IOOS #MMI #python Sample SPARQL queries against MMI IOOS Vocabularies. 8/9/2014
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:a8f2a677f7c82cd3bf009dd811b895fd87969a0b976f4a7023d1b8ef3aa9fc81" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Sample SPARQL queries against MMI IOOS Vocabularies" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Taken from Sara Haines examples at http://code.google.com/p/ioostech/wiki/VocabularySearchTesting and http://nbviewer.ipython.org/urls/raw.github.com/nccoos/ioos-vocabulary-search/master/examples/example_sparql_wrapper.ipynb\n", | |
"\n", | |
"*example_sparql_wrapper*. Send SPARQL query to SPARQL endpoint, store and output result using SPARQLWrapper. \n", | |
"Modified from ORA \"Learning SPARQL\" by Bob DuCharme -- ex361.py\n", | |
"\n", | |
"See also Sara Haines' SPARQL examples page, with links and dynamic SPARQL query forms: http://www.unc.edu/~haines/orrioos.html. See also http://mmisw.org/ont/sparql.html\n", | |
"\n", | |
"8/9/2014. Emilio Mayorga. Added the last section, selecting CF standard names from a subset of IOOS Parameter terms.\n", | |
"5/8/2014. Emilio Mayorga. Extended the examples, including the use of narrowMatch." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd\n", | |
"from SPARQLWrapper import SPARQLWrapper, JSON\n", | |
"\n", | |
"sparql = SPARQLWrapper(\"http://mmisw.org/sparql\")\n", | |
"\n", | |
"def sparqlqry_jsonresponse(sparql, queryString):\n", | |
" \"\"\" Convenience function\n", | |
" \"\"\"\n", | |
" sparql.setQuery(queryString)\n", | |
" sparql.setReturnFormat(JSON)\n", | |
" j = sparql.query().convert()\n", | |
" return j['results']['bindings']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"/usr/anaconda/anaconda/envs/uwapl_em_1/lib/python2.7/site-packages/SPARQLWrapper/Wrapper.py:88: RuntimeWarning: JSON-LD disabled because no suitable support has been found\n", | |
" warnings.warn(\"JSON-LD disabled because no suitable support has been found\", RuntimeWarning)\n" | |
] | |
} | |
], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"--------------------------------------------\n", | |
"### 1. Narrow Match of terms to total_coliform, *within* IOOS Parameter Vocabulary" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"queryString = \"\"\"\n", | |
"PREFIX ioos: <http://mmisw.org/ont/ioos/parameter/>\n", | |
"SELECT DISTINCT ?p ?definition ?unit ?property ?value \n", | |
"WHERE {?p a ioos:Parameter .\n", | |
" ?p ?property ?value .\n", | |
" ?p ioos:Term ?term . \n", | |
" ?p ioos:Definition ?definition . \n", | |
" ?p ioos:Units ?unit .\n", | |
" FILTER (regex(str(?property), \"narrowMatch\", \"i\") \n", | |
" && regex(str(?value), \"ioos\", \"i\") && regex(str(?p), \"total_coliform\", \"i\") )\n", | |
" } \n", | |
"ORDER BY ?p\n", | |
"\"\"\"\n", | |
"# FILTER (regex(str(?property), \"narrowMatch\", \"i\") && regex(str(?value), \"ioos\", \"i\") )\n", | |
"# FILTER (regex(str(?property), \"(exactMatch|closeMatch)\", \"i\") && regex(str(?value), \"cf\", \"i\") )" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"sparql.setQuery(queryString)\n", | |
"sparql.setReturnFormat(JSON)\n", | |
"j = sparql.query().convert()\n", | |
"j.keys()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"[u'head', u'results']" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"j['head'], j['results'].keys()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
"({u'vars': [u'p', u'definition', u'unit', u'property', u'value']},\n", | |
" [u'bindings'])" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"jrb = j['results']['bindings']\n", | |
"type(jrb), len(jrb)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": [ | |
"(list, 4)" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"jrb[-1]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"{u'definition': {u'type': u'literal',\n", | |
" u'value': u'Coliforms are bacteria that are naturally present in the environment and used as an indicator that other, potentially harmful, bacteria may be present. Most Probable Number (MPN) per unit volume.'},\n", | |
" u'p': {u'type': u'uri',\n", | |
" u'value': u'http://mmisw.org/ont/ioos/parameter/total_coliform'},\n", | |
" u'property': {u'type': u'uri',\n", | |
" u'value': u'http://www.w3.org/2004/02/skos/core#narrowMatch'},\n", | |
" u'unit': {u'type': u'literal', u'value': u'MPN ml-1'},\n", | |
" u'value': {u'type': u'uri',\n", | |
" u'value': u'http://mmisw.org/ont/ioos/parameter/fecal_coliform'}}" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"jrb[-1]['value']['value']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"u'http://mmisw.org/ont/ioos/parameter/fecal_coliform'" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"parameter (broad) term: %s\" % jrb[-1]['p']['value']\n", | |
"print \"narrower ('children') terms:\"\n", | |
"for jrbv in jrb:\n", | |
" print \" %s\" % jrbv['value']['value']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"parameter (broad) term: http://mmisw.org/ont/ioos/parameter/total_coliform\n", | |
"narrower ('children') terms:\n", | |
" http://mmisw.org/ont/ioos/parameter/e_coli\n", | |
" http://mmisw.org/ont/ioos/parameter/enterococcus\n", | |
" http://mmisw.org/ont/ioos/parameter/clostridium_perfringens\n", | |
" http://mmisw.org/ont/ioos/parameter/fecal_coliform\n" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"jrb[:2]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"[{u'definition': {u'type': u'literal',\n", | |
" u'value': u'Coliforms are bacteria that are naturally present in the environment and used as an indicator that other, potentially harmful, bacteria may be present. Most Probable Number (MPN) per unit volume.'},\n", | |
" u'p': {u'type': u'uri',\n", | |
" u'value': u'http://mmisw.org/ont/ioos/parameter/total_coliform'},\n", | |
" u'property': {u'type': u'uri',\n", | |
" u'value': u'http://www.w3.org/2004/02/skos/core#narrowMatch'},\n", | |
" u'unit': {u'type': u'literal', u'value': u'MPN ml-1'},\n", | |
" u'value': {u'type': u'uri',\n", | |
" u'value': u'http://mmisw.org/ont/ioos/parameter/e_coli'}},\n", | |
" {u'definition': {u'type': u'literal',\n", | |
" u'value': u'Coliforms are bacteria that are naturally present in the environment and used as an indicator that other, potentially harmful, bacteria may be present. Most Probable Number (MPN) per unit volume.'},\n", | |
" u'p': {u'type': u'uri',\n", | |
" u'value': u'http://mmisw.org/ont/ioos/parameter/total_coliform'},\n", | |
" u'property': {u'type': u'uri',\n", | |
" u'value': u'http://www.w3.org/2004/02/skos/core#narrowMatch'},\n", | |
" u'unit': {u'type': u'literal', u'value': u'MPN ml-1'},\n", | |
" u'value': {u'type': u'uri',\n", | |
" u'value': u'http://mmisw.org/ont/ioos/parameter/enterococcus'}}]" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"--------------------------------------------\n", | |
"### 2. Narrow Match of IOOS Parameter Vocabulary terms from IOOS Core Variable dissolved_nutrients term" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# core_variable attributes: name & definition\n", | |
"queryString = \"\"\"\n", | |
"PREFIX iooscore: <http://mmisw.org/ont/ioos/core_variable/>\n", | |
"SELECT DISTINCT ?p ?name ?definition ?property ?value \n", | |
"WHERE {?p a iooscore:Core_Variable .\n", | |
" ?p iooscore:name ?name . \n", | |
" ?p iooscore:definition ?definition . \n", | |
" ?p ?property ?value .\n", | |
" FILTER (regex(str(?property), \"narrowMatch\", \"i\") \n", | |
" && regex(str(?value), \"ioos/parameter\", \"i\") \n", | |
" && regex(str(?name), \"dissolved_nutrients\", \"i\") )\n", | |
" } \n", | |
"ORDER BY ?p\n", | |
"\"\"\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"jrb2 = sparqlqry_jsonresponse(sparql, queryString)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"parameter (broad) term: %s\" % jrb2[-1]['name']['value']\n", | |
"print \"%d narrower ('children') terms:\" % len(jrb2)\n", | |
"for jrbv in jrb2:\n", | |
" print \" %s\" % jrbv['value']['value']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"parameter (broad) term: dissolved_nutrients\n", | |
"9 narrower ('children') terms:\n", | |
" http://mmisw.org/ont/ioos/parameter/ammonium\n", | |
" http://mmisw.org/ont/ioos/parameter/dissolved_organic_carbon\n", | |
" http://mmisw.org/ont/ioos/parameter/total_phosphorus\n", | |
" http://mmisw.org/ont/ioos/parameter/dissolved_oxygen_saturation\n", | |
" http://mmisw.org/ont/ioos/parameter/ammonia\n", | |
" http://mmisw.org/ont/ioos/parameter/total_nitrogen\n", | |
" http://mmisw.org/ont/ioos/parameter/phosphate\n", | |
" http://mmisw.org/ont/ioos/parameter/dissolved_inorganic_carbon\n", | |
" http://mmisw.org/ont/ioos/parameter/dissolved_oxygen\n" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"--------------------------------------------\n", | |
"### 3. Find CF standard name matches for the IOOS Parameter terms found above as matching the IOOS Core Variable \"dissolved_nutrients\"\n", | |
"This match will be fairly generous, as it includes SKOS narrowMatch, closeMatch and exactMatch relationships." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# List of IOOS Parameter terms selected in the previous query\n", | |
"ioosparam_sel_lst = [jrbv['value']['value'].split(\"/\")[-1] for jrbv in jrb2]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"queryString = \"\"\"\n", | |
"PREFIX ioos: <http://mmisw.org/ont/ioos/parameter/>\n", | |
"SELECT DISTINCT ?term ?parameter ?definition ?unit ?property ?value \n", | |
"WHERE {?parameter a ioos:Parameter .\n", | |
" ?parameter ?property ?value .\n", | |
" ?parameter ioos:Term ?term . \n", | |
" ?parameter ioos:Definition ?definition . \n", | |
" ?parameter ioos:Units ?unit .\n", | |
" FILTER (regex(str(?property), \"(narrowMatch|closeMatch|exactMatch)\", \"i\") \n", | |
" && regex(str(?value), \"cf\", \"i\") \n", | |
" && regex(str(?term), \"(%s)\", \"i\") )\n", | |
" } \n", | |
"ORDER BY ?term ?parameter\n", | |
"\"\"\" % '|'.join(ioosparam_sel_lst)\n", | |
"\n", | |
"jrb3 = sparqlqry_jsonresponse(sparql, queryString)\n", | |
"\n", | |
"ioosparam_terms = list([ x[\"term\"][\"value\"] for x in jrb3 ])\n", | |
"skos_match = list([ x[\"property\"][\"value\"].split('#')[-1] for x in jrb3 ])\n", | |
"cf_standard_uris = list([ x[\"value\"][\"value\"] for x in jrb3 ])\n", | |
"cf_standard_names = map(lambda x: x.split(\"/\")[-1], cf_standard_uris)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Note: cf_standard_uris can easily be added to this DataFrame, but I've skipped them for clarity\n", | |
"pd.DataFrame.from_records(zip(ioosparam_terms, skos_match, cf_standard_names), \n", | |
" columns=(\"IOOS Parameter Term\", \"SKOS Match\", \"CF Standard Name\"))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IOOS Parameter Term</th>\n", | |
" <th>SKOS Match</th>\n", | |
" <th>CF Standard Name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0 </th>\n", | |
" <td> ammonium</td>\n", | |
" <td> narrowMatch</td>\n", | |
" <td> mole_concentration_of_ammonium_in_sea_water</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1 </th>\n", | |
" <td> dissolved_inorganic_carbon</td>\n", | |
" <td> closeMatch</td>\n", | |
" <td> mole_concentration_of_dissolved_inorganic_carb...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2 </th>\n", | |
" <td> dissolved_organic_carbon</td>\n", | |
" <td> closeMatch</td>\n", | |
" <td> mole_concentration_of_dissolved_organic_carbon...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3 </th>\n", | |
" <td> dissolved_oxygen</td>\n", | |
" <td> exactMatch</td>\n", | |
" <td> mass_concentration_of_oxygen_in_sea_water</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4 </th>\n", | |
" <td> dissolved_oxygen_saturation</td>\n", | |
" <td> exactMatch</td>\n", | |
" <td> fractional_saturation_of_oxygen_in_sea_water</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5 </th>\n", | |
" <td> phosphate</td>\n", | |
" <td> narrowMatch</td>\n", | |
" <td> mole_concentration_of_phosphate_in_sea_water</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6 </th>\n", | |
" <td> phosphate</td>\n", | |
" <td> narrowMatch</td>\n", | |
" <td> mole_ratio_of_nitrate_to_phosphate_in_sea_water</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7 </th>\n", | |
" <td> phosphate</td>\n", | |
" <td> narrowMatch</td>\n", | |
" <td> moles_of_phosphate_per_unit_mass_in_sea_water</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8 </th>\n", | |
" <td> phosphate</td>\n", | |
" <td> narrowMatch</td>\n", | |
" <td> mass_concentration_of_phosphate_in_sea_water</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9 </th>\n", | |
" <td> total_nitrogen</td>\n", | |
" <td> narrowMatch</td>\n", | |
" <td> mass_concentration_of_inorganic_nitrogen_in_se...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td> total_phosphorus</td>\n", | |
" <td> narrowMatch</td>\n", | |
" <td> mole_concentration_of_particulate_organic_matt...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>11 rows \u00d7 3 columns</p>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 15, | |
"text": [ | |
" IOOS Parameter Term SKOS Match \\\n", | |
"0 ammonium narrowMatch \n", | |
"1 dissolved_inorganic_carbon closeMatch \n", | |
"2 dissolved_organic_carbon closeMatch \n", | |
"3 dissolved_oxygen exactMatch \n", | |
"4 dissolved_oxygen_saturation exactMatch \n", | |
"5 phosphate narrowMatch \n", | |
"6 phosphate narrowMatch \n", | |
"7 phosphate narrowMatch \n", | |
"8 phosphate narrowMatch \n", | |
"9 total_nitrogen narrowMatch \n", | |
"10 total_phosphorus narrowMatch \n", | |
"\n", | |
" CF Standard Name \n", | |
"0 mole_concentration_of_ammonium_in_sea_water \n", | |
"1 mole_concentration_of_dissolved_inorganic_carb... \n", | |
"2 mole_concentration_of_dissolved_organic_carbon... \n", | |
"3 mass_concentration_of_oxygen_in_sea_water \n", | |
"4 fractional_saturation_of_oxygen_in_sea_water \n", | |
"5 mole_concentration_of_phosphate_in_sea_water \n", | |
"6 mole_ratio_of_nitrate_to_phosphate_in_sea_water \n", | |
"7 moles_of_phosphate_per_unit_mass_in_sea_water \n", | |
"8 mass_concentration_of_phosphate_in_sea_water \n", | |
"9 mass_concentration_of_inorganic_nitrogen_in_se... \n", | |
"10 mole_concentration_of_particulate_organic_matt... \n", | |
"\n", | |
"[11 rows x 3 columns]" | |
] | |
} | |
], | |
"prompt_number": 15 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment