Skip to content

Instantly share code, notes, and snippets.

@datadavev
Created July 2, 2021 18:10
Show Gist options
  • Save datadavev/3c8a9d09f3dbff78ee427c6ea162fa91 to your computer and use it in GitHub Desktop.
Save datadavev/3c8a9d09f3dbff78ee427c6ea162fa91 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Simple demo of JSON-LD context load override using jsonld-java.\n",
"\n",
"Remote context load is disabled and instead a local context is registered for use.\n",
"\n",
"* https://github.com/jsonld-java/jsonld-java\n",
"* https://github.com/jsonld-java/jsonld-java/blob/master/core/src/main/java/com/github/jsonldjava/core/JsonLdProcessor.java\n",
"* https://github.com/jsonld-java/jsonld-java-tools\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Java Version = 1.8.0_292\n"
]
}
],
"source": [
"import os\n",
"import logging\n",
"logging.basicConfig(level=logging.INFO)\n",
"#set from /usr/libexec/java_home -v1.8\n",
"os.environ[\"JAVA_HOME\"] = \"/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home\"\n",
"import jnius_config\n",
"\n",
"#shaded jar should contain all that's needed\n",
"classpath = [\n",
" \"../eval/jsonld-java/core/target/jsonld-java-0.13.4-SNAPSHOT.jar\",\n",
" \"classes/commons-io-2.10.0/commons-io-2.10.0.jar\",\n",
" os.path.expanduser(\"~/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.11.4/jackson-core-2.11.4.jar\"),\n",
" os.path.expanduser(\"~/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.11.4/jackson-databind-2.11.4.jar\"),\n",
" os.path.expanduser(\"~/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.11.4/jackson-annotations-2.11.4.jar\"),\n",
" \"classes/httpcomponents-client-4.5.13/lib/httpcore-4.4.13.jar\", \n",
" \"classes/httpcomponents-client-4.5.13/lib/httpclient-4.5.13.jar\", \n",
" \"classes/httpcomponents-client-4.5.13/lib/httpclient-cache-4.5.13.jar\", \n",
" \"classes/httpcomponents-client-4.5.13/lib/httpclient-osgi-4.5.13.jar\",\n",
" \"classes/slf4j-api-1.7.25.jar\",\n",
" \"classes/slf4j-jdk14-1.7.25.jar\",\n",
" \"classes/jcl-over-slf4j-1.7.31.jar\",\n",
"]\n",
"jnius_config.add_options(\"-Djava.awt.headless=true\")\n",
"jnius_config.set_classpath(*classpath)\n",
"from jnius import autoclass\n",
"\n",
"version = autoclass('java.lang.System').getProperty(\"java.version\")\n",
"print(f\"Java Version = {version}\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def isHttpsSchemaOrg(exp_doc) -> bool:\n",
" \"\"\"True if exp_doc is using https://schema.org/ namespace\n",
" Returns the first match of the use of https://schema.org or\n",
" http://schema.org on a key found by recursing through the\n",
" object.\n",
" Args:\n",
" exp_doc: expanded JSON-LD document\n",
" Returns:\n",
" bool: True is document is using `https://schema.org` namespace\n",
" \"\"\"\n",
" for i, v in enumerate(exp_doc):\n",
" if isinstance(v, dict):\n",
" return isHttpsSchemaOrg(exp_doc[i])\n",
" if isinstance(v, str):\n",
" if v.startswith(\"https://schema.org\"):\n",
" return True\n",
" elif v.startswith(\"http://schema.org\"):\n",
" return False\n",
" return False\n",
"\n",
"#Load the local context into a string\n",
"import json\n",
"context_fname = \"data/jsonldcontext_https.jsonld\"\n",
"with open(context_fname, \"r\") as inf:\n",
" _context = json.load(inf)\n",
"context_str_https = json.dumps(_context)\n",
"\n",
"context_fname = \"data/jsonldcontext.jsonld\"\n",
"with open(context_fname, \"r\") as inf:\n",
" _context = json.load(inf)\n",
"context_str_http = json.dumps(_context)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Disable remote downloads\n",
"system = autoclass(\"java.lang.System\")\n",
"system.setProperty(\"com.github.jsonldjava.disallowRemoteContextLoading\", \"true\");\n",
"\n",
"# Customize the document loader to use local context for these URIs\n",
"options_https = autoclass(\"com.github.jsonldjava.core.JsonLdOptions\")()\n",
"dl = autoclass(\"com.github.jsonldjava.core.DocumentLoader\")()\n",
"dl.addInjectedDoc(\"https://schema.org/\", context_str_https)\n",
"dl.addInjectedDoc(\"http://schema.org/\", context_str_https)\n",
"dl.addInjectedDoc(\"http://schema.org\", context_str_https)\n",
"dl.addInjectedDoc(\"https://schema.org\", context_str_https)\n",
"options_https.setDocumentLoader(dl)\n",
"\n",
"# Customize the document loader to use local context for these URIs\n",
"options_http = autoclass(\"com.github.jsonldjava.core.JsonLdOptions\")()\n",
"dl = autoclass(\"com.github.jsonldjava.core.DocumentLoader\")()\n",
"dl.addInjectedDoc(\"https://schema.org/\", context_str_http)\n",
"dl.addInjectedDoc(\"http://schema.org/\", context_str_http)\n",
"dl.addInjectedDoc(\"http://schema.org\", context_str_http)\n",
"dl.addInjectedDoc(\"https://schema.org\", context_str_http)\n",
"options_http.setDocumentLoader(dl)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ {\n",
" \"@type\" : [ \"https://schema.org/Dataset\" ],\n",
" \"https://schema.org/creator\" : [ {\n",
" \"@type\" : [ \"https://schema.org/Person\" ],\n",
" \"https://schema.org/name\" : [ {\n",
" \"@value\" : \"creator_01\"\n",
" } ]\n",
" }, {\n",
" \"@type\" : [ \"https://schema.org/Person\" ],\n",
" \"https://schema.org/name\" : [ {\n",
" \"@value\" : \"creator_02\"\n",
" } ]\n",
" }, {\n",
" \"@type\" : [ \"https://schema.org/Person\" ],\n",
" \"https://schema.org/name\" : [ {\n",
" \"@value\" : \"creator_03\"\n",
" } ]\n",
" } ],\n",
" \"https://schema.org/name\" : [ {\n",
" \"@value\" : \"Test with vocab only\"\n",
" } ]\n",
"} ]\n"
]
}
],
"source": [
"#Load a JSONLD document\n",
"fname = \"data/test_00.jsonld\"\n",
"fis = autoclass('java.io.FileInputStream')(fname)\n",
"JsonUtils = autoclass('com.github.jsonldjava.utils.JsonUtils')\n",
"jsonObject = JsonUtils.fromInputStream(fis);\n",
"\n",
"# Expand using the custom document loader\n",
"processor = autoclass('com.github.jsonldjava.core.JsonLdProcessor')\n",
"expanded = processor.expand(jsonObject)\n",
"\n",
"#Pretty print the expanded JSON-LD\n",
"print(JsonUtils.toPrettyString(expanded))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"type\" : \"Dataset\",\n",
" \"schema:creator\" : [ {\n",
" \"type\" : \"Person\",\n",
" \"name\" : \"creator_01\"\n",
" }, {\n",
" \"type\" : \"Person\",\n",
" \"name\" : \"creator_02\"\n",
" }, {\n",
" \"type\" : \"Person\",\n",
" \"name\" : \"creator_03\"\n",
" } ],\n",
" \"name\" : \"Test with vocab only\",\n",
" \"@context\" : \"https://schema.org/\"\n",
"}\n"
]
}
],
"source": [
"# This part is a bit funky since we need to get the object into python\n",
"# So we serialize to a string, and load that string as json in python\n",
"# then we test whether it is https://schema.org/ or http://schema.org/\n",
"\n",
"compact_context_doc = {\"@context\":\"https://schema.org/\"}\n",
"compact_context = JsonUtils.fromString( json.dumps(compact_context_doc) )\n",
"_doc = json.loads( JsonUtils.toPrettyString(expanded) )\n",
"if isHttpsSchemaOrg(_doc):\n",
" compacted = processor.compact(expanded, compact_context, options_https)\n",
"else:\n",
" compacted = processor.compact(expanded, compact_context. options_http)\n",
"print(JsonUtils.toPrettyString(compacted))\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ {\n",
" \"http://schema.org/name\" : [ {\n",
" \"@value\" : \"Test with vocab only\"\n",
" } ],\n",
" \"http://schema.org/creator\" : [ {\n",
" \"http://schema.org/name\" : [ {\n",
" \"@value\" : \"creator_01\"\n",
" } ],\n",
" \"@type\" : [ \"http://schema.org/Person\" ]\n",
" }, {\n",
" \"http://schema.org/name\" : [ {\n",
" \"@value\" : \"creator_02\"\n",
" } ],\n",
" \"@type\" : [ \"http://schema.org/Person\" ]\n",
" }, {\n",
" \"http://schema.org/name\" : [ {\n",
" \"@value\" : \"creator_03\"\n",
" } ],\n",
" \"@type\" : [ \"http://schema.org/Person\" ]\n",
" } ],\n",
" \"@type\" : [ \"http://schema.org/Dataset\" ]\n",
"} ]\n"
]
}
],
"source": [
"# Expand the compacted document\n",
"expanded_2 = processor.expand(compacted, options_http)\n",
"print(JsonUtils.toPrettyString(expanded_2))\n"
]
}
],
"metadata": {
"interpreter": {
"hash": "a928d80adeaeb657c9c705ae1520c8b72562bc1c00e283a14a0b0cba88bad73b"
},
"kernelspec": {
"display_name": "Python 3.9.5 64-bit ('d1_index_processor': virtualenvwrapper)",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment