Skip to content

Instantly share code, notes, and snippets.

@datadavev
Created October 21, 2024 19:06
Show Gist options
  • Save datadavev/5d8142b93e7c131623cc437a7f2900a3 to your computer and use it in GitHub Desktop.
Save datadavev/5d8142b93e7c131623cc437a7f2900a3 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": [
"Use Jena from python to run sparql queries.\n",
"\n",
"Setup:\n",
"\n",
"1. Download jena binary, e.g. https://dlcdn.apache.org/jena/binaries/apache-jena-fuseki-5.2.0.tar.gz and unzip into the same folder as this notebook\n",
"2. Create a python virtual environment and `python -m pip install pyjnius jupyterlab`\n",
"3. Run `jupyter lab` and open this notebook "
],
"id": "a5127ebcb084839d"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-21T18:49:36.850403Z",
"start_time": "2024-10-21T18:49:36.058755Z"
}
},
"cell_type": "code",
"source": [
"import glob\n",
"\n",
"import jnius_config\n",
"\n",
"# Add the classpath to the jena jars for jnius to find\n",
"_classpath_base = \"apache-jena-5.2.0/lib/\"\n",
"_jars = [\".\"]\n",
"for jar in glob.glob(f\"{_classpath_base}*.jar\"):\n",
" _jars.append(jar)\n",
"try:\n",
" jnius_config.add_options(\"-Djava.awt.headless=true\")\n",
" jnius_config.set_classpath(*_jars)\n",
"except:\n",
" pass\n",
"from jnius import autoclass\n",
"\n",
"# initialize some instances\n",
"dataManager = autoclass(\"org.apache.jena.riot.RDFDataMgr\")\n",
"queryFactory = autoclass(\"org.apache.jena.query.QueryFactory\")\n",
"queryExecutionFactory = autoclass(\"org.apache.jena.query.QueryExecutionFactory\")\n"
],
"id": "2af6c48b-44e3-4a75-a36a-eae571687f0f",
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "7c76081b-7c84-424f-91a3-0173f6a8f2f7",
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-21T18:50:37.746463Z",
"start_time": "2024-10-21T18:50:37.512729Z"
}
},
"source": [
"# Load a jsonld document and show some triples\n",
"fname = \"data/test01.jsonld\"\n",
"dataset = dataManager.loadDataset(fname)\n",
"\n",
"q0 = \"\"\"SELECT *\n",
"{\n",
" ?s ?p ?o\n",
"}\n",
"LIMIT 100\n",
"\"\"\"\n",
"\n",
"q = queryFactory.create(q0)\n",
"qexec = queryExecutionFactory.create(q, dataset)\n",
"results = qexec.execSelect()\n",
"print(\"Result vars:\")\n",
"for v in results.resultVars:\n",
" print(v)\n",
"print(\"Results:\")\n",
"for res in results:\n",
" for varname in res.varNames():\n",
" print(f\"{varname} : {res.get(varname).toString()}\")\n",
" print()\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result vars:\n",
"s\n",
"p\n",
"o\n",
"Results:\n",
"s : _:0f70e5393a494c05de31b8d1a42e0c02\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/Organization\n",
"\n",
"s : _:0f70e5393a494c05de31b8d1a42e0c02\n",
"p : http://schema.org/name\n",
"o : National Center for Ecological Analaysis and Synthesis\n",
"\n",
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/Person\n",
"\n",
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n",
"p : http://schema.org/Name\n",
"o : Nesbitt, Ian\t\n",
"\n",
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n",
"p : http://schema.org/Email\n",
"o : [email protected]\n",
"\n",
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n",
"p : http://schema.org/Affiliation\n",
"o : _:0f70e5393a494c05de31b8d1a42e0c02\n",
"\n",
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n",
"p : http://schema.org/Identifier\n",
"o : _:04230e722f3db4463e2ea4557b65031b\n",
"\n",
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/Person\n",
"\n",
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n",
"p : http://schema.org/Name\n",
"o : Tao, Jing\t\n",
"\n",
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n",
"p : http://schema.org/Email\n",
"o : [email protected]\n",
"\n",
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n",
"p : http://schema.org/Affiliation\n",
"o : _:76015027aeda0978e93a52088af496c8\n",
"\n",
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n",
"p : http://schema.org/Identifier\n",
"o : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n",
"\n",
"s : _:76015027aeda0978e93a52088af496c8\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/Organization\n",
"\n",
"s : _:76015027aeda0978e93a52088af496c8\n",
"p : http://schema.org/name\n",
"o : National Center for Ecological Analaysis and Synthesis\n",
"\n",
"s : _:04230e722f3db4463e2ea4557b65031b\n",
"p : http://schema.org/propertyID\n",
"o : https://registry.identifiers.org/registry/orcid\n",
"\n",
"s : _:04230e722f3db4463e2ea4557b65031b\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/PropertyValue\n",
"\n",
"s : _:04230e722f3db4463e2ea4557b65031b\n",
"p : http://schema.org/value\n",
"o : 0000-0001-5828-6070\n",
"\n",
"s : _:04230e722f3db4463e2ea4557b65031b\n",
"p : http://schema.org/url\n",
"o : http://orcid.org/0000-0001-5828-6070\n",
"\n",
"s : _:d5ff749c01ab5b676bd3e7fec8cae986\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/Role\n",
"\n",
"s : _:d5ff749c01ab5b676bd3e7fec8cae986\n",
"p : http://schema.org/creator\n",
"o : _:a9d890f6c2181366e1a6ab322ed91fc1\n",
"\n",
"s : _:883d667f3116b88592a982cb6698f67e\n",
"p : http://schema.org/alternateName\n",
"o : test01.jsonld\n",
"\n",
"s : _:883d667f3116b88592a982cb6698f67e\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/Dataset\n",
"\n",
"s : _:883d667f3116b88592a982cb6698f67e\n",
"p : http://schema.org/description\n",
"o : Vocab https://schema.org/, creator without @list and with @type: 'Role'. Modeled after CanWIN SO format.\n",
"\n",
"s : _:883d667f3116b88592a982cb6698f67e\n",
"p : http://schema.org/description\n",
"o : test\n",
"\n",
"s : _:883d667f3116b88592a982cb6698f67e\n",
"p : http://schema.org/creator\n",
"o : _:d5ff749c01ab5b676bd3e7fec8cae986\n",
"\n",
"s : _:883d667f3116b88592a982cb6698f67e\n",
"p : http://schema.org/creator\n",
"o : _:c030d1038cf2c83cbb78aea19b840749\n",
"\n",
"s : _:883d667f3116b88592a982cb6698f67e\n",
"p : http://schema.org/name\n",
"o : test of alternative creator field configuration\n",
"\n",
"s : _:c030d1038cf2c83cbb78aea19b840749\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/Role\n",
"\n",
"s : _:c030d1038cf2c83cbb78aea19b840749\n",
"p : http://schema.org/creator\n",
"o : _:2a2d14602ff9f0edd4fc5085a7398a23\n",
"\n",
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n",
"p : http://schema.org/propertyID\n",
"o : https://registry.identifiers.org/registry/orcid\n",
"\n",
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n",
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n",
"o : http://schema.org/PropertyValue\n",
"\n",
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n",
"p : http://schema.org/value\n",
"o : 0000-0002-1209-5268\n",
"\n",
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n",
"p : http://schema.org/url\n",
"o : http://orcid.org/0000-0002-1209-5268\n",
"\n"
]
}
],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-21T19:04:09.120314Z",
"start_time": "2024-10-21T19:04:09.111093Z"
}
},
"cell_type": "code",
"source": [
"fname = \"data/test01.jsonld\"\n",
"dataset = dataManager.loadDataset(fname)\n",
"\n",
"q1 = \"\"\"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
"PREFIX list: <http://jena.apache.org/ARQ/list#>\n",
"PREFIX SO: <http://schema.org/>\n",
"\n",
"SELECT ?member ?pos\n",
"WHERE {\n",
" ?datasetId rdf:type SO:Dataset .\n",
" ?datasetId SO:description ?descr .\n",
" ?descr list:member ?member .\n",
" ?descr list:index ( ?pos ?member ).\n",
"}\n",
"\"\"\"\n",
"q = queryFactory.create(q1)\n",
"qexec = queryExecutionFactory.create(q, dataset)\n",
"results = qexec.execSelect()\n",
"for v in results.resultVars:\n",
" print(v)\n",
"for res in results:\n",
" for varname in res.varNames():\n",
" print(f\"{varname} : {res.get(varname).toString()}\")\n",
" print()\n"
],
"id": "e4aea2d89d805d9e",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"member\n",
"pos\n",
"member : Vocab https://schema.org/, creator without @list and with @type: 'Role'. Modeled after CanWIN SO format.\n",
"pos : \"0\"^^xsd:integer\n",
"\n",
"member : test\n",
"pos : \"1\"^^xsd:integer\n",
"\n"
]
}
],
"execution_count": 25
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c29c1c9-2f94-444d-924e-871b3d9a85ed",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"@context": {
"@vocab": "http://schema.org/",
"description": {
"@container": "@list"
},
"creator": {
"@container": "@list"
}
},
"@type":"Dataset",
"description":[
"Vocab https://schema.org/, creator without @list and with @type: 'Role'. Modeled after CanWIN SO format.",
"test"
],
"name":"test of alternative creator field configuration",
"alternateName":"test01.jsonld",
"creator": [
{
"@type": "Role",
"creator": {
"@type": "Person",
"Affiliation": {
"@type": "Organization",
"name": "National Center for Ecological Analaysis and Synthesis"
},
"Email": "[email protected]",
"Identifier": {
"@type": "PropertyValue",
"propertyID": "https://registry.identifiers.org/registry/orcid",
"url": "http://orcid.org/0000-0001-5828-6070",
"value": "0000-0001-5828-6070"
},
"Name": "Nesbitt, Ian\t"
}
},
{
"@type": "Role",
"creator": {
"@type": "Person",
"Affiliation": {
"@type": "Organization",
"name": "National Center for Ecological Analaysis and Synthesis"
},
"Email": "[email protected]",
"Identifier": {
"@type": "PropertyValue",
"propertyID": "https://registry.identifiers.org/registry/orcid",
"url": "http://orcid.org/0000-0002-1209-5268",
"value": "0000-0002-1209-5268"
},
"Name": "Tao, Jing\t"
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment