Created
July 2, 2021 18:10
-
-
Save datadavev/3c8a9d09f3dbff78ee427c6ea162fa91 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Simple demo of JSON-LD context load override using jsonld-java.\n", | |
"\n", | |
"Remote context load is disabled and instead a local context is registered for use.\n", | |
"\n", | |
"* https://github.com/jsonld-java/jsonld-java\n", | |
"* https://github.com/jsonld-java/jsonld-java/blob/master/core/src/main/java/com/github/jsonldjava/core/JsonLdProcessor.java\n", | |
"* https://github.com/jsonld-java/jsonld-java-tools\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Java Version = 1.8.0_292\n" | |
] | |
} | |
], | |
"source": [ | |
"import os\n", | |
"import logging\n", | |
"logging.basicConfig(level=logging.INFO)\n", | |
"#set from /usr/libexec/java_home -v1.8\n", | |
"os.environ[\"JAVA_HOME\"] = \"/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home\"\n", | |
"import jnius_config\n", | |
"\n", | |
"#shaded jar should contain all that's needed\n", | |
"classpath = [\n", | |
" \"../eval/jsonld-java/core/target/jsonld-java-0.13.4-SNAPSHOT.jar\",\n", | |
" \"classes/commons-io-2.10.0/commons-io-2.10.0.jar\",\n", | |
" os.path.expanduser(\"~/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.11.4/jackson-core-2.11.4.jar\"),\n", | |
" os.path.expanduser(\"~/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.11.4/jackson-databind-2.11.4.jar\"),\n", | |
" os.path.expanduser(\"~/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.11.4/jackson-annotations-2.11.4.jar\"),\n", | |
" \"classes/httpcomponents-client-4.5.13/lib/httpcore-4.4.13.jar\", \n", | |
" \"classes/httpcomponents-client-4.5.13/lib/httpclient-4.5.13.jar\", \n", | |
" \"classes/httpcomponents-client-4.5.13/lib/httpclient-cache-4.5.13.jar\", \n", | |
" \"classes/httpcomponents-client-4.5.13/lib/httpclient-osgi-4.5.13.jar\",\n", | |
" \"classes/slf4j-api-1.7.25.jar\",\n", | |
" \"classes/slf4j-jdk14-1.7.25.jar\",\n", | |
" \"classes/jcl-over-slf4j-1.7.31.jar\",\n", | |
"]\n", | |
"jnius_config.add_options(\"-Djava.awt.headless=true\")\n", | |
"jnius_config.set_classpath(*classpath)\n", | |
"from jnius import autoclass\n", | |
"\n", | |
"version = autoclass('java.lang.System').getProperty(\"java.version\")\n", | |
"print(f\"Java Version = {version}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def isHttpsSchemaOrg(exp_doc) -> bool:\n", | |
" \"\"\"True if exp_doc is using https://schema.org/ namespace\n", | |
" Returns the first match of the use of https://schema.org or\n", | |
" http://schema.org on a key found by recursing through the\n", | |
" object.\n", | |
" Args:\n", | |
" exp_doc: expanded JSON-LD document\n", | |
" Returns:\n", | |
" bool: True is document is using `https://schema.org` namespace\n", | |
" \"\"\"\n", | |
" for i, v in enumerate(exp_doc):\n", | |
" if isinstance(v, dict):\n", | |
" return isHttpsSchemaOrg(exp_doc[i])\n", | |
" if isinstance(v, str):\n", | |
" if v.startswith(\"https://schema.org\"):\n", | |
" return True\n", | |
" elif v.startswith(\"http://schema.org\"):\n", | |
" return False\n", | |
" return False\n", | |
"\n", | |
"#Load the local context into a string\n", | |
"import json\n", | |
"context_fname = \"data/jsonldcontext_https.jsonld\"\n", | |
"with open(context_fname, \"r\") as inf:\n", | |
" _context = json.load(inf)\n", | |
"context_str_https = json.dumps(_context)\n", | |
"\n", | |
"context_fname = \"data/jsonldcontext.jsonld\"\n", | |
"with open(context_fname, \"r\") as inf:\n", | |
" _context = json.load(inf)\n", | |
"context_str_http = json.dumps(_context)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Disable remote downloads\n", | |
"system = autoclass(\"java.lang.System\")\n", | |
"system.setProperty(\"com.github.jsonldjava.disallowRemoteContextLoading\", \"true\");\n", | |
"\n", | |
"# Customize the document loader to use local context for these URIs\n", | |
"options_https = autoclass(\"com.github.jsonldjava.core.JsonLdOptions\")()\n", | |
"dl = autoclass(\"com.github.jsonldjava.core.DocumentLoader\")()\n", | |
"dl.addInjectedDoc(\"https://schema.org/\", context_str_https)\n", | |
"dl.addInjectedDoc(\"http://schema.org/\", context_str_https)\n", | |
"dl.addInjectedDoc(\"http://schema.org\", context_str_https)\n", | |
"dl.addInjectedDoc(\"https://schema.org\", context_str_https)\n", | |
"options_https.setDocumentLoader(dl)\n", | |
"\n", | |
"# Customize the document loader to use local context for these URIs\n", | |
"options_http = autoclass(\"com.github.jsonldjava.core.JsonLdOptions\")()\n", | |
"dl = autoclass(\"com.github.jsonldjava.core.DocumentLoader\")()\n", | |
"dl.addInjectedDoc(\"https://schema.org/\", context_str_http)\n", | |
"dl.addInjectedDoc(\"http://schema.org/\", context_str_http)\n", | |
"dl.addInjectedDoc(\"http://schema.org\", context_str_http)\n", | |
"dl.addInjectedDoc(\"https://schema.org\", context_str_http)\n", | |
"options_http.setDocumentLoader(dl)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[ {\n", | |
" \"@type\" : [ \"https://schema.org/Dataset\" ],\n", | |
" \"https://schema.org/creator\" : [ {\n", | |
" \"@type\" : [ \"https://schema.org/Person\" ],\n", | |
" \"https://schema.org/name\" : [ {\n", | |
" \"@value\" : \"creator_01\"\n", | |
" } ]\n", | |
" }, {\n", | |
" \"@type\" : [ \"https://schema.org/Person\" ],\n", | |
" \"https://schema.org/name\" : [ {\n", | |
" \"@value\" : \"creator_02\"\n", | |
" } ]\n", | |
" }, {\n", | |
" \"@type\" : [ \"https://schema.org/Person\" ],\n", | |
" \"https://schema.org/name\" : [ {\n", | |
" \"@value\" : \"creator_03\"\n", | |
" } ]\n", | |
" } ],\n", | |
" \"https://schema.org/name\" : [ {\n", | |
" \"@value\" : \"Test with vocab only\"\n", | |
" } ]\n", | |
"} ]\n" | |
] | |
} | |
], | |
"source": [ | |
"#Load a JSONLD document\n", | |
"fname = \"data/test_00.jsonld\"\n", | |
"fis = autoclass('java.io.FileInputStream')(fname)\n", | |
"JsonUtils = autoclass('com.github.jsonldjava.utils.JsonUtils')\n", | |
"jsonObject = JsonUtils.fromInputStream(fis);\n", | |
"\n", | |
"# Expand using the custom document loader\n", | |
"processor = autoclass('com.github.jsonldjava.core.JsonLdProcessor')\n", | |
"expanded = processor.expand(jsonObject)\n", | |
"\n", | |
"#Pretty print the expanded JSON-LD\n", | |
"print(JsonUtils.toPrettyString(expanded))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{\n", | |
" \"type\" : \"Dataset\",\n", | |
" \"schema:creator\" : [ {\n", | |
" \"type\" : \"Person\",\n", | |
" \"name\" : \"creator_01\"\n", | |
" }, {\n", | |
" \"type\" : \"Person\",\n", | |
" \"name\" : \"creator_02\"\n", | |
" }, {\n", | |
" \"type\" : \"Person\",\n", | |
" \"name\" : \"creator_03\"\n", | |
" } ],\n", | |
" \"name\" : \"Test with vocab only\",\n", | |
" \"@context\" : \"https://schema.org/\"\n", | |
"}\n" | |
] | |
} | |
], | |
"source": [ | |
"# This part is a bit funky since we need to get the object into python\n", | |
"# So we serialize to a string, and load that string as json in python\n", | |
"# then we test whether it is https://schema.org/ or http://schema.org/\n", | |
"\n", | |
"compact_context_doc = {\"@context\":\"https://schema.org/\"}\n", | |
"compact_context = JsonUtils.fromString( json.dumps(compact_context_doc) )\n", | |
"_doc = json.loads( JsonUtils.toPrettyString(expanded) )\n", | |
"if isHttpsSchemaOrg(_doc):\n", | |
" compacted = processor.compact(expanded, compact_context, options_https)\n", | |
"else:\n", | |
" compacted = processor.compact(expanded, compact_context. options_http)\n", | |
"print(JsonUtils.toPrettyString(compacted))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[ {\n", | |
" \"http://schema.org/name\" : [ {\n", | |
" \"@value\" : \"Test with vocab only\"\n", | |
" } ],\n", | |
" \"http://schema.org/creator\" : [ {\n", | |
" \"http://schema.org/name\" : [ {\n", | |
" \"@value\" : \"creator_01\"\n", | |
" } ],\n", | |
" \"@type\" : [ \"http://schema.org/Person\" ]\n", | |
" }, {\n", | |
" \"http://schema.org/name\" : [ {\n", | |
" \"@value\" : \"creator_02\"\n", | |
" } ],\n", | |
" \"@type\" : [ \"http://schema.org/Person\" ]\n", | |
" }, {\n", | |
" \"http://schema.org/name\" : [ {\n", | |
" \"@value\" : \"creator_03\"\n", | |
" } ],\n", | |
" \"@type\" : [ \"http://schema.org/Person\" ]\n", | |
" } ],\n", | |
" \"@type\" : [ \"http://schema.org/Dataset\" ]\n", | |
"} ]\n" | |
] | |
} | |
], | |
"source": [ | |
"# Expand the compacted document\n", | |
"expanded_2 = processor.expand(compacted, options_http)\n", | |
"print(JsonUtils.toPrettyString(expanded_2))\n" | |
] | |
} | |
], | |
"metadata": { | |
"interpreter": { | |
"hash": "a928d80adeaeb657c9c705ae1520c8b72562bc1c00e283a14a0b0cba88bad73b" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3.9.5 64-bit ('d1_index_processor': virtualenvwrapper)", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.5" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment