Created
October 21, 2024 19:06
-
-
Save datadavev/5d8142b93e7c131623cc437a7f2900a3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": [ | |
"Use Jena from python to run sparql queries.\n", | |
"\n", | |
"Setup:\n", | |
"\n", | |
"1. Download jena binary, e.g. https://dlcdn.apache.org/jena/binaries/apache-jena-fuseki-5.2.0.tar.gz and unzip into the same folder as this notebook\n", | |
"2. Create a python virtual environment and `python -m pip install pyjnius jupyterlab`\n", | |
"3. Run `jupyter lab` and open this notebook " | |
], | |
"id": "a5127ebcb084839d" | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-21T18:49:36.850403Z", | |
"start_time": "2024-10-21T18:49:36.058755Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import glob\n", | |
"\n", | |
"import jnius_config\n", | |
"\n", | |
"# Add the classpath to the jena jars for jnius to find\n", | |
"_classpath_base = \"apache-jena-5.2.0/lib/\"\n", | |
"_jars = [\".\"]\n", | |
"for jar in glob.glob(f\"{_classpath_base}*.jar\"):\n", | |
" _jars.append(jar)\n", | |
"try:\n", | |
" jnius_config.add_options(\"-Djava.awt.headless=true\")\n", | |
" jnius_config.set_classpath(*_jars)\n", | |
"except:\n", | |
" pass\n", | |
"from jnius import autoclass\n", | |
"\n", | |
"# initialize some instances\n", | |
"dataManager = autoclass(\"org.apache.jena.riot.RDFDataMgr\")\n", | |
"queryFactory = autoclass(\"org.apache.jena.query.QueryFactory\")\n", | |
"queryExecutionFactory = autoclass(\"org.apache.jena.query.QueryExecutionFactory\")\n" | |
], | |
"id": "2af6c48b-44e3-4a75-a36a-eae571687f0f", | |
"outputs": [], | |
"execution_count": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"id": "7c76081b-7c84-424f-91a3-0173f6a8f2f7", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-21T18:50:37.746463Z", | |
"start_time": "2024-10-21T18:50:37.512729Z" | |
} | |
}, | |
"source": [ | |
"# Load a jsonld document and show some triples\n", | |
"fname = \"data/test01.jsonld\"\n", | |
"dataset = dataManager.loadDataset(fname)\n", | |
"\n", | |
"q0 = \"\"\"SELECT *\n", | |
"{\n", | |
" ?s ?p ?o\n", | |
"}\n", | |
"LIMIT 100\n", | |
"\"\"\"\n", | |
"\n", | |
"q = queryFactory.create(q0)\n", | |
"qexec = queryExecutionFactory.create(q, dataset)\n", | |
"results = qexec.execSelect()\n", | |
"print(\"Result vars:\")\n", | |
"for v in results.resultVars:\n", | |
" print(v)\n", | |
"print(\"Results:\")\n", | |
"for res in results:\n", | |
" for varname in res.varNames():\n", | |
" print(f\"{varname} : {res.get(varname).toString()}\")\n", | |
" print()\n" | |
], | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Result vars:\n", | |
"s\n", | |
"p\n", | |
"o\n", | |
"Results:\n", | |
"s : _:0f70e5393a494c05de31b8d1a42e0c02\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/Organization\n", | |
"\n", | |
"s : _:0f70e5393a494c05de31b8d1a42e0c02\n", | |
"p : http://schema.org/name\n", | |
"o : National Center for Ecological Analaysis and Synthesis\n", | |
"\n", | |
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/Person\n", | |
"\n", | |
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n", | |
"p : http://schema.org/Name\n", | |
"o : Nesbitt, Ian\t\n", | |
"\n", | |
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n", | |
"p : http://schema.org/Email\n", | |
"o : [email protected]\n", | |
"\n", | |
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n", | |
"p : http://schema.org/Affiliation\n", | |
"o : _:0f70e5393a494c05de31b8d1a42e0c02\n", | |
"\n", | |
"s : _:2a2d14602ff9f0edd4fc5085a7398a23\n", | |
"p : http://schema.org/Identifier\n", | |
"o : _:04230e722f3db4463e2ea4557b65031b\n", | |
"\n", | |
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/Person\n", | |
"\n", | |
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n", | |
"p : http://schema.org/Name\n", | |
"o : Tao, Jing\t\n", | |
"\n", | |
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n", | |
"p : http://schema.org/Email\n", | |
"o : [email protected]\n", | |
"\n", | |
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n", | |
"p : http://schema.org/Affiliation\n", | |
"o : _:76015027aeda0978e93a52088af496c8\n", | |
"\n", | |
"s : _:a9d890f6c2181366e1a6ab322ed91fc1\n", | |
"p : http://schema.org/Identifier\n", | |
"o : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n", | |
"\n", | |
"s : _:76015027aeda0978e93a52088af496c8\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/Organization\n", | |
"\n", | |
"s : _:76015027aeda0978e93a52088af496c8\n", | |
"p : http://schema.org/name\n", | |
"o : National Center for Ecological Analaysis and Synthesis\n", | |
"\n", | |
"s : _:04230e722f3db4463e2ea4557b65031b\n", | |
"p : http://schema.org/propertyID\n", | |
"o : https://registry.identifiers.org/registry/orcid\n", | |
"\n", | |
"s : _:04230e722f3db4463e2ea4557b65031b\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/PropertyValue\n", | |
"\n", | |
"s : _:04230e722f3db4463e2ea4557b65031b\n", | |
"p : http://schema.org/value\n", | |
"o : 0000-0001-5828-6070\n", | |
"\n", | |
"s : _:04230e722f3db4463e2ea4557b65031b\n", | |
"p : http://schema.org/url\n", | |
"o : http://orcid.org/0000-0001-5828-6070\n", | |
"\n", | |
"s : _:d5ff749c01ab5b676bd3e7fec8cae986\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/Role\n", | |
"\n", | |
"s : _:d5ff749c01ab5b676bd3e7fec8cae986\n", | |
"p : http://schema.org/creator\n", | |
"o : _:a9d890f6c2181366e1a6ab322ed91fc1\n", | |
"\n", | |
"s : _:883d667f3116b88592a982cb6698f67e\n", | |
"p : http://schema.org/alternateName\n", | |
"o : test01.jsonld\n", | |
"\n", | |
"s : _:883d667f3116b88592a982cb6698f67e\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/Dataset\n", | |
"\n", | |
"s : _:883d667f3116b88592a982cb6698f67e\n", | |
"p : http://schema.org/description\n", | |
"o : Vocab https://schema.org/, creator without @list and with @type: 'Role'. Modeled after CanWIN SO format.\n", | |
"\n", | |
"s : _:883d667f3116b88592a982cb6698f67e\n", | |
"p : http://schema.org/description\n", | |
"o : test\n", | |
"\n", | |
"s : _:883d667f3116b88592a982cb6698f67e\n", | |
"p : http://schema.org/creator\n", | |
"o : _:d5ff749c01ab5b676bd3e7fec8cae986\n", | |
"\n", | |
"s : _:883d667f3116b88592a982cb6698f67e\n", | |
"p : http://schema.org/creator\n", | |
"o : _:c030d1038cf2c83cbb78aea19b840749\n", | |
"\n", | |
"s : _:883d667f3116b88592a982cb6698f67e\n", | |
"p : http://schema.org/name\n", | |
"o : test of alternative creator field configuration\n", | |
"\n", | |
"s : _:c030d1038cf2c83cbb78aea19b840749\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/Role\n", | |
"\n", | |
"s : _:c030d1038cf2c83cbb78aea19b840749\n", | |
"p : http://schema.org/creator\n", | |
"o : _:2a2d14602ff9f0edd4fc5085a7398a23\n", | |
"\n", | |
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n", | |
"p : http://schema.org/propertyID\n", | |
"o : https://registry.identifiers.org/registry/orcid\n", | |
"\n", | |
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n", | |
"p : http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", | |
"o : http://schema.org/PropertyValue\n", | |
"\n", | |
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n", | |
"p : http://schema.org/value\n", | |
"o : 0000-0002-1209-5268\n", | |
"\n", | |
"s : _:95bf3e11e0d5da02ff90a7c84cbadfd5\n", | |
"p : http://schema.org/url\n", | |
"o : http://orcid.org/0000-0002-1209-5268\n", | |
"\n" | |
] | |
} | |
], | |
"execution_count": 4 | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2024-10-21T19:04:09.120314Z", | |
"start_time": "2024-10-21T19:04:09.111093Z" | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"fname = \"data/test01.jsonld\"\n", | |
"dataset = dataManager.loadDataset(fname)\n", | |
"\n", | |
"q1 = \"\"\"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n", | |
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n", | |
"PREFIX list: <http://jena.apache.org/ARQ/list#>\n", | |
"PREFIX SO: <http://schema.org/>\n", | |
"\n", | |
"SELECT ?member ?pos\n", | |
"WHERE {\n", | |
" ?datasetId rdf:type SO:Dataset .\n", | |
" ?datasetId SO:description ?descr .\n", | |
" ?descr list:member ?member .\n", | |
" ?descr list:index ( ?pos ?member ).\n", | |
"}\n", | |
"\"\"\"\n", | |
"q = queryFactory.create(q1)\n", | |
"qexec = queryExecutionFactory.create(q, dataset)\n", | |
"results = qexec.execSelect()\n", | |
"for v in results.resultVars:\n", | |
" print(v)\n", | |
"for res in results:\n", | |
" for varname in res.varNames():\n", | |
" print(f\"{varname} : {res.get(varname).toString()}\")\n", | |
" print()\n" | |
], | |
"id": "e4aea2d89d805d9e", | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"member\n", | |
"pos\n", | |
"member : Vocab https://schema.org/, creator without @list and with @type: 'Role'. Modeled after CanWIN SO format.\n", | |
"pos : \"0\"^^xsd:integer\n", | |
"\n", | |
"member : test\n", | |
"pos : \"1\"^^xsd:integer\n", | |
"\n" | |
] | |
} | |
], | |
"execution_count": 25 | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8c29c1c9-2f94-444d-924e-871b3d9a85ed", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"@context": { | |
"@vocab": "http://schema.org/", | |
"description": { | |
"@container": "@list" | |
}, | |
"creator": { | |
"@container": "@list" | |
} | |
}, | |
"@type":"Dataset", | |
"description":[ | |
"Vocab https://schema.org/, creator without @list and with @type: 'Role'. Modeled after CanWIN SO format.", | |
"test" | |
], | |
"name":"test of alternative creator field configuration", | |
"alternateName":"test01.jsonld", | |
"creator": [ | |
{ | |
"@type": "Role", | |
"creator": { | |
"@type": "Person", | |
"Affiliation": { | |
"@type": "Organization", | |
"name": "National Center for Ecological Analaysis and Synthesis" | |
}, | |
"Email": "[email protected]", | |
"Identifier": { | |
"@type": "PropertyValue", | |
"propertyID": "https://registry.identifiers.org/registry/orcid", | |
"url": "http://orcid.org/0000-0001-5828-6070", | |
"value": "0000-0001-5828-6070" | |
}, | |
"Name": "Nesbitt, Ian\t" | |
} | |
}, | |
{ | |
"@type": "Role", | |
"creator": { | |
"@type": "Person", | |
"Affiliation": { | |
"@type": "Organization", | |
"name": "National Center for Ecological Analaysis and Synthesis" | |
}, | |
"Email": "[email protected]", | |
"Identifier": { | |
"@type": "PropertyValue", | |
"propertyID": "https://registry.identifiers.org/registry/orcid", | |
"url": "http://orcid.org/0000-0002-1209-5268", | |
"value": "0000-0002-1209-5268" | |
}, | |
"Name": "Tao, Jing\t" | |
} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment