Skip to content

Instantly share code, notes, and snippets.

@datadavev
Created March 1, 2021 03:00
Show Gist options
  • Save datadavev/b1affdfd36ed19c59f742f8dde0ca1f3 to your computer and use it in GitHub Desktop.
Save datadavev/b1affdfd36ed19c59f742f8dde0ca1f3 to your computer and use it in GitHub Desktop.
server caching and ld
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "least-paper",
"metadata": {},
"source": [
"Two requests are sent to the same URL using technically the same Accept header, though it differs literally.\n",
"\n",
"In the first instance, a response of `application/ld+json` is received. In the second a response of `text/html` is received."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "noted-chest",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"def loghistory(resp):\n",
" print(f\"Req headers: {resp.request.headers}\")\n",
" def ph(_id, _h):\n",
" print(f\"{i}: URL: {_h.url}\")\n",
" print(f\" Status: {_h.status_code}\")\n",
" print(\" headers:\")\n",
" for hk in sorted(_h.headers):\n",
" print(f\" {hk} : {_h.headers[hk]}\")\n",
"\n",
" i = 0\n",
" for h in resp.history:\n",
" ph(i, h)\n",
" i = i+1\n",
" ph(i, resp)\n",
" "
]
},
{
"cell_type": "markdown",
"id": "dramatic-visit",
"metadata": {},
"source": [
"Send a request asking specifically for a JSON-LD response:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ancient-repair",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Req headers: {'User-Agent': 'python-requests/2.25.1', 'Accept-Encoding': 'gzip, deflate', 'Accept': 'application/ld+json', 'Connection': 'keep-alive'}\n",
"0: URL: https://www.bco-dmo.org/dataset/3160\n",
" Status: 301\n",
" headers:\n",
" Access-Control-Allow-Headers : origin, x-requested-with, content-type\n",
" Access-Control-Allow-Methods : GET\n",
" Access-Control-Allow-Origin : *\n",
" Cache-Control : no-cache, must-revalidate\n",
" Connection : Keep-Alive\n",
" Content-Encoding : gzip\n",
" Content-Length : 20\n",
" Content-Location : https://www.bco-dmo.org/dataset/3160\n",
" Content-Type : text/html; charset=UTF-8\n",
" Date : Mon, 01 Mar 2021 01:04:30 GMT\n",
" Expires : Sun, 19 Nov 1978 05:00:00 GMT\n",
" Keep-Alive : timeout=5, max=100\n",
" Location : https://www.bco-dmo.org/node/3160.jsonld\n",
" Server : Apache/2.2.3 (Red Hat)\n",
" Strict-Transport-Security : max-age=63072000; includeSubdomains; preload\n",
" Vary : Accept-Encoding,User-Agent\n",
" X-Content-Type-Options : nosniff, nosniff\n",
" X-Drupal-Cache : MISS\n",
" X-Powered-By : PHP/5.3.3\n",
"1: URL: https://www.bco-dmo.org/node/3160.jsonld\n",
" Status: 301\n",
" headers:\n",
" Access-Control-Allow-Headers : origin, x-requested-with, content-type\n",
" Access-Control-Allow-Methods : GET\n",
" Access-Control-Allow-Origin : *\n",
" Cache-Control : no-cache, must-revalidate\n",
" Connection : Keep-Alive\n",
" Content-Encoding : gzip\n",
" Content-Length : 20\n",
" Content-Type : text/html; charset=UTF-8\n",
" Date : Mon, 01 Mar 2021 01:04:33 GMT\n",
" Expires : Sun, 19 Nov 1978 05:00:00 GMT\n",
" Keep-Alive : timeout=5, max=99\n",
" Location : https://www.bco-dmo.org/node/3160/rdf/jsonld\n",
" Server : Apache/2.2.3 (Red Hat)\n",
" Strict-Transport-Security : max-age=63072000; includeSubdomains; preload\n",
" Vary : Accept-Encoding,User-Agent\n",
" X-Content-Type-Options : nosniff, nosniff\n",
" X-Drupal-Cache : MISS\n",
" X-Powered-By : PHP/5.3.3\n",
"2: URL: https://www.bco-dmo.org/node/3160/rdf/jsonld\n",
" Status: 200\n",
" headers:\n",
" Access-Control-Allow-Headers : origin, x-requested-with, content-type\n",
" Access-Control-Allow-Methods : GET\n",
" Access-Control-Allow-Origin : *\n",
" Cache-Control : public, max-age=900\n",
" Connection : Keep-Alive\n",
" Content-Encoding : gzip\n",
" Content-Length : 2635\n",
" Content-Location : https://www.bco-dmo.org/node/3160/rdf/jsonld\n",
" Content-Type : application/ld+json\n",
" Date : Mon, 01 Mar 2021 01:04:35 GMT\n",
" Etag : \"1614560675-1\"\n",
" Expires : Sun, 19 Nov 1978 05:00:00 GMT\n",
" Keep-Alive : timeout=5, max=98\n",
" Last-Modified : Mon, 01 Mar 2021 01:04:35 GMT\n",
" Server : Apache/2.2.3 (Red Hat)\n",
" Strict-Transport-Security : max-age=63072000; includeSubdomains; preload\n",
" Vary : Cookie,User-Agent,Accept-Encoding\n",
" X-Content-Type-Options : nosniff, nosniff\n",
" X-Drupal-Cache : MISS\n",
" X-Powered-By : PHP/5.3.3\n",
"{\"@context\":{\"content\":\"http:\\/\\/purl.org\\/rss\\/1.0\\/modules\\/content\\/\",\"dc\":\"http:\\/\\/purl.org\\/dc\\/terms\\/\",\"foaf\":\"http:\\/\\/xmlns.com\\/foaf\\/0.1\\/\",\"og\":\"http:\\/\\/ogp.me\\/ns#\",\"rdfs\":\"http:\\/\\/www.w3.org\\/2000\\/01\\/rdf-schema#\",\"sioc\":\"http:\\/\\/rdfs.org\\/sioc\\/ns#\",\"sioct\":\"http:\\/\\/rdfs.org\\/sioc\\/types#\",\"skos\":\"http:\\/\\/www.w3.org\\/2004\\/02\\/skos\\/core#\",\"xsd\":\"http:\\/\\/www.w3.org\\/2001\\/XMLSchema#\",\"owl\":\"http:\\/\\/www.w3.org\\/2002\\/07\\/owl#\",\"rdf\":\"http:\\/\\/www.w3.org\\/1999\\/02\\/22-rdf-syntax-ns#\",\"rss\":\"http:\\/\\/purl.org\\/rss\\/1.0\\/\",\"site\":\"https:\\/\\/www.bco-dmo.org\\/ns#\",\"odo\":\"http:\\/\\/ocean-data.org\\/schema\\/\",\"emo\":\"http:\\/\\/ocean-data.org\\/schema\\/entity-matching#\",\"bibo\":\"http:\\/\\/purl.org\\/ontology\\/bibo\\/\",\"crypto\":\"http:\\/\\/id.loc.gov\\/vocabulary\\/preservation\\/cryptographicHashFunctions\\/\",\"bcodmo\":\"http:\\/\\/lod.bco-dmo.org\\/id\\/\",\"arpfo\":\"http:\\/\\/vocab.ox.ac.uk\\/projectfunding#\",\"tw\":\"http:\\/\\/tw.rpi.edu\\/schema\\/\",\"dcat\":\"http:\\/\\/www.w3.org\\/ns\\/dcat#\",\"time\":\"http:\\/\\/www.w3.org\\/2006\\/time#\",\"geo\":\"http:\\/\\/www.w3.org\\/2003\\/01\\/geo\\/wgs84_pos#\",\"geosparql\":\"http:\\/\\/www.opengis.net\\/ont\\/geosparql#\",\"sf\":\"http:\\/\\/www.opengis.net\\/ont\\/sf#\",\"void\":\"http:\\/\\/rdfs.org\\/ns\\/void#\",\"sd\":\"http:\\/\\/www.w3.org\\/ns\\/sparql-service-description#\",\"dctype\":\"http:\\/\\/purl.org\\/dc\\/dcmitype\\/\",\"prov\":\"http:\\/\\/www.w3.org\\/ns\\/prov#\",\"schema\":\"http:\\/\\/schema.org\\/\",\"geolink\":\"http:\\/\\/schema.geolink.org\\/1.0\\/base\\/main#\"},\"@id\":\"http:\\/\\/lod.bco-dmo.org\\/id\\/dataset\\/3160#graph\",\"@graph\":[{\"http:\\/\\/lod.bco-dmo.org\\/id\\/dataset\\/3160\":{\"@id\":\"http:\\/\\/lod.bco-dmo.org\\/id\\/dataset\\/3160\",\"@type\":[\"http:\\/\\/ocean-data.org\\/schema\\/DeploymentDatasetCollection\",\"http:\\/\\/www.w3.org\\/ns\\/dcat#Dataset\",\"http:\\/\\/www.w3.org\\/ns\\/prov#Entity\",\"http:\\/\\/ocean-data.org\\/schema\\/Dataset\"],\"http:\\/\\/ocean-data.org\\/schema\\/hasAcquisitionDescription\":[{\"@value\":\"<div xmlns=\\\"http:\\/\\/www.w3.org\\/1999\\/xhtml\\\" lang=\\\"en\\\"><p>Generated by BCO-DMO staff from project \n"
]
}
],
"source": [
"url = \"https://www.bco-dmo.org/dataset/3160\"\n",
"headers = {\"Accept\":\"application/ld+json\"}\n",
"response1 = requests.get(url, headers=headers, timeout=15)\n",
"loghistory(response1)\n",
"#print the first 2k chars\n",
"print(response1.text[:2000])"
]
},
{
"cell_type": "markdown",
"id": "applicable-castle",
"metadata": {},
"source": [
"Send another request to the same URL requesting the same, except using prioritization of formats in the Accept header:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "christian-freight",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Req headers: {'User-Agent': 'python-requests/2.25.1', 'Accept-Encoding': 'gzip, deflate', 'Accept': 'application/ld+json;q=1.0,text/html;q=0.8', 'Connection': 'keep-alive'}\n",
"0: URL: https://www.bco-dmo.org/dataset/3160\n",
" Status: 200\n",
" headers:\n",
" Access-Control-Allow-Headers : origin, x-requested-with, content-type\n",
" Access-Control-Allow-Methods : GET\n",
" Access-Control-Allow-Origin : *\n",
" Cache-Control : public, max-age=900\n",
" Connection : Keep-Alive\n",
" Content-Encoding : gzip\n",
" Content-Language : en\n",
" Content-Location : https://www.bco-dmo.org/dataset/3160\n",
" Content-Type : text/html; charset=utf-8\n",
" Date : Mon, 01 Mar 2021 01:05:06 GMT\n",
" Etag : \"1614560704-1\"\n",
" Expires : Sun, 19 Nov 1978 05:00:00 GMT\n",
" Keep-Alive : timeout=5, max=100\n",
" Last-Modified : Mon, 01 Mar 2021 01:05:04 GMT\n",
" Link : </node/3160>; rel=\"shortlink\"\n",
" Server : Apache/2.2.3 (Red Hat)\n",
" Strict-Transport-Security : max-age=63072000; includeSubdomains; preload\n",
" Transfer-Encoding : chunked\n",
" Vary : Cookie,User-Agent,Accept-Encoding\n",
" X-Content-Type-Options : nosniff\n",
" X-Drupal-Cache : HIT\n",
" X-Frame-Options : SAMEORIGIN\n",
" X-Generator : Drupal 7 (http://drupal.org)\n",
" X-Powered-By : PHP/5.3.3\n",
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML+RDFa 1.0//EN\"\n",
" \"http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd\">\n",
"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" version=\"XHTML+RDFa 1.0\" dir=\"ltr\"\n",
" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n",
" xmlns:dc=\"http://purl.org/dc/terms/\"\n",
" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n",
" xmlns:og=\"http://ogp.me/ns#\"\n",
" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n",
" xmlns:sioc=\"http://rdfs.org/sioc/ns#\"\n",
" xmlns:sioct=\"http://rdfs.org/sioc/types#\"\n",
" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"\n",
" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\"\n",
" xmlns:owl=\"http://www.w3.org/2002/07/owl#\"\n",
" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n",
" xmlns:rss=\"http://purl.org/rss/1.0/\"\n",
" xmlns:site=\"https://www.bco-dmo.org/ns#\"\n",
" xmlns:odo=\"http://ocean-data.org/schema/\"\n",
" xmlns:emo=\"http://ocean-data.org/schema/entity-matching#\"\n",
" xmlns:bibo=\"http://purl.org/ontology/bibo/\"\n",
" xmlns:crypto=\"http://id.loc.gov/vocabulary/preservation/cryptographicHashFunctions/\"\n",
" xmlns:bcodmo=\"http://lod.bco-dmo.org/id/\"\n",
" xmlns:arpfo=\"http://vocab.ox.ac.uk/projectfunding#\"\n",
" xmlns:tw=\"http://tw.rpi.edu/schema/\"\n",
" xmlns:dcat=\"http://www.w3.org/ns/dcat#\"\n",
" xmlns:time=\"http://www.w3.org/2006/time#\"\n",
" xmlns:geo=\"http://www.w3.org/2003/01/geo/wgs84_pos#\"\n",
" xmlns:geosparql=\"http://www.opengis.net/ont/geosparql#\"\n",
" xmlns:sf=\"http://www.opengis.net/ont/sf#\"\n",
" xmlns:void=\"http://rdfs.org/ns/void#\"\n",
" xmlns:sd=\"http://www.w3.org/ns/sparql-service-description#\"\n",
" xmlns:dctype=\"http://purl.org/dc/dcmitype/\"\n",
" xmlns:prov=\"http://www.w3.org/ns/prov#\"\n",
" xmlns:schema=\"http://schema.org/\"\n",
" xmlns:geolink=\"http://schema.geolink.org/1.0/base/main#\">\n",
"\n",
"<head profile=\"http://www.w3.org/1999/xhtml/vocab\">\n",
" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n",
"<meta name=\"Generator\" content=\"Drupal 7 (http://drupal.org)\" />\n",
"<link rel=\"canonical\" href=\"https://www.bco-dmo.org/dataset/3160\" />\n",
"<link rel=\"shortlink\" href=\"/node/3160\" />\n",
"<scr\n"
]
}
],
"source": [
"url = \"https://www.bco-dmo.org/dataset/3160\"\n",
"headers = {\"Accept\":\"application/ld+json;q=1.0,text/html;q=0.8\"}\n",
"response2 = requests.get(url, headers=headers, timeout=15)\n",
"loghistory(response2)\n",
"#print the first 2k chars\n",
"print(response2.text[:2000])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "integrated-router",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MD5 = f46a8a944cfd9e1bebcafc88b65c0cb3\n",
"[\n",
" {\n",
" \"@graph\": [\n",
" {\n",
" \"@id\": \"_:c14n0\",\n",
" \"@type\": [\n",
" \"http://ocean-data.org/schema/DublinCoreMetadata\",\n",
" \"http://schema.org/EntryPoint\"\n",
" ],\n",
" \"http://purl.org/dc/terms/conformsTo\": [\n",
" {\n",
" \"@type\": \"http://www.w3.org/2001/XMLSchema#anyURI\",\n",
" \"@value\": \"http://purl.org/dc/elements/1.1/\"\n",
" }\n",
" ],\n",
" \"http://schema.org/contentType\": [\n",
" {\n",
" \"@type\": \"http://www.w3.org/2001/XMLSchema#token\",\n",
" \"@value\": \"application/xml\"\n",
" }\n",
" ],\n",
" \"http://schema.org/url\": [\n",
" {\n",
" \"@type\": \"http://www.w3.org/2001/XMLSchema#anyURI\",\n",
" \"@value\": \"https://www.bco-dmo.org/dataset/3160/dublin-core\"\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"@id\": \"_:c14n1\",\n",
" \"@type\": [\n",
" \"http://schema.org/EntryPoint\"\n",
" ],\n",
" \"http://schema.org/contentType\": [\n",
" {\n",
" \"@type\": \"http://www.w3.org/2001/XMLSchema#token\",\n",
" \"@value\": \"application/ld+json\"\n",
" }\n",
" ],\n",
" \"http://schema.org/url\": [\n",
" {\n",
" \"@type\": \"http://www.w3.org/2001/XMLSchema#anyURI\",\n",
" \"@value\": \"https://www.bco-dmo.org/dataset/3160.json\"\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"@id\": \"_:c14n10\",\n",
" \"@type\": [\n",
" \"http://ocean-data.org/schema/DataViewAffordance\"\n",
" ],\n",
" \"http://ocean-data.org/schema/affordedBy\": [\n",
" {\n",
" \"@id\": \"http://lod.bco-dmo.org/id/affiliation/191\"\n",
" }\n",
" ],\n",
" \"http://schema.org/name\": [\n",
" {\n",
" \"@value\": \"JGOFS Data Viewer\"\n",
" }\n",
" ],\n",
" \"http://schema.org/subjectOf\": [\n",
" {\n",
" \"@id\": \"http://lod.bco-dmo.org/id/dataset/3160\"\n",
" }\n",
" ],\n",
" \"http://schema.org/target\": [\n",
" {\n",
" \"@value\": \"_:jgofsHTMLentryPoint3160\"\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"@id\": \"_:c14n11\",\n",
" \"@type\": [\n",
" \"http://schema.org/EntryPoint\"\n",
" ],\n"
]
}
],
"source": [
"import json\n",
"import pyld.jsonld\n",
"import c14n\n",
"import hashlib\n",
"\n",
"def makeCanonical(doc, base):\n",
" opts = {\n",
" \"algorithm\": \"URDNA2015\",\n",
" \"base\": base,\n",
" \"format\": \"application/n-quads\"\n",
" }\n",
" _a = pyld.jsonld.normalize(doc, options=opts)\n",
" return pyld.jsonld.from_rdf(_a, options=opts)\n",
" \n",
"doc1 = makeCanonical(response1.json(), response1.url)\n",
"#Print the MD5 hash of the canonical form:\n",
"print(f\"MD5 = {hashlib.md5(c14n.canonicalize(doc1)).hexdigest()}\")\n",
"\n",
"#print an excerpt...\n",
"print(json.dumps(doc1, indent=2, sort_keys=True)[:2048])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "sporting-trash",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MD5 = 452223ed9afd17bee273f4dfc39653a8\n",
"[\n",
" {\n",
" \"@id\": \"_:c14n0\",\n",
" \"@type\": [\n",
" \"https://schema.org/DataDownload\"\n",
" ],\n",
" \"https://schema.org/about\": [\n",
" {\n",
" \"@value\": \"https://www.bco-dmo.org/dataset/3160\"\n",
" }\n",
" ],\n",
" \"https://schema.org/contentUrl\": [\n",
" {\n",
" \"@value\": \"https://www.bco-dmo.org/dataset/3160/iso\"\n",
" }\n",
" ],\n",
" \"https://schema.org/creativeWorkStatus\": [\n",
" {\n",
" \"@value\": \"In Review\"\n",
" }\n",
" ],\n",
" \"https://schema.org/encodesCreativeWork\": [\n",
" {\n",
" \"@value\": \"https://www.bco-dmo.org/dataset/3160\"\n",
" }\n",
" ],\n",
" \"https://schema.org/encodingFormat\": [\n",
" {\n",
" \"@value\": \"application/xml\"\n",
" },\n",
" {\n",
" \"@value\": \"http://www.isotc211.org/2005/gmd-noaa\"\n",
" }\n",
" ],\n",
" \"https://schema.org/name\": [\n",
" {\n",
" \"@value\": \"ISO 19115-2 (NOAA Profile)\"\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"@id\": \"_:c14n1\",\n",
" \"@type\": [\n",
" \"https://schema.org/DataDownload\"\n",
" ],\n",
" \"https://schema.org/contentUrl\": [\n",
" {\n",
" \"@value\": \"https://www.bco-dmo.org/dataset/3160/data/download\"\n",
" }\n",
" ],\n",
" \"https://schema.org/creativeWorkStatus\": [\n",
" {\n",
" \"@value\": \"In Review\"\n",
" }\n",
" ],\n",
" \"https://schema.org/datePublished\": [\n",
" {\n",
" \"@value\": \"2009-08-03\"\n",
" }\n",
" ],\n",
" \"https://schema.org/encodingFormat\": [\n",
" {\n",
" \"@value\": \"text/tab-separated-values\"\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"@id\": \"https://www.bco-dmo.org\",\n",
" \"@type\": [\n",
" \"https://schema.org/Organization\"\n",
" ],\n",
" \"https://schema.org/alternateName\": [\n",
" {\n",
" \"@value\": \"BCO-DMO\"\n",
" }\n",
" ],\n",
" \"https://schema.org/identifier\": [\n",
" {\n",
" \"@value\": \"http://lod.bco-dmo.org/id/affiliation/191\"\n",
" }\n",
" ],\n",
" \"https://schema.org/name\": [\n",
" {\n",
" \"@value\": \"Biological and Chemical Data Management Office\"\n",
" }\n",
" ],\n",
" \"https://schema.org/sameAs\": [\n",
" {\n",
" \"@value\": \"http://www.re3data.org/repository/r3d100000012\"\n",
" }\n",
" ],\n",
" \"https://schema.org/url\": [\n",
" {\n",
" \"@value\": \"https://www.bco-dmo.org\"\n",
" }\n",
" ]\n",
" },\n",
" {\n",
"\n"
]
}
],
"source": [
"src2 = pyld.jsonld.load_html(response2.text, response2.url, None, {})\n",
"doc2 = makeCanonical(src2, base=response2.url)\n",
"\n",
"#Print the MD5 hash of the canonical form:\n",
"print(f\"MD5 = {hashlib.md5(c14n.canonicalize(doc2)).hexdigest()}\")\n",
"\n",
"#print an excerpt...\n",
"print(json.dumps(doc2, indent=2, sort_keys=True)[:2048])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "collectible-diana",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment