Created
July 13, 2022 17:29
-
-
Save jermnelson/bf8e78fdb9675cd877686cd26d253c5a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "4bdc247c-0d14-4606-81f8-4823313b8f66", | |
"metadata": {}, | |
"source": [ | |
"# SRS Record Streaming\n", | |
"\n", | |
"[API Documentation](https://s3.amazonaws.com/foliodocs/api/mod-source-record-storage/p/source-record-storage-stream.html)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"id": "d424ab3c-3ccb-49e8-8841-50a4dad4e6d8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import datetime\n", | |
"import json\n", | |
"\n", | |
"import requests\n", | |
"from folioclient import FolioClient" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"id": "5905d9e1-6c17-4919-b7e4-39066e682720", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dev_client = FolioClient(\n", | |
" \"https://okapi-dev.stanford.edu\", \n", | |
" \"sul\", \n", | |
" \"sul_admin\", \n", | |
" \"{ask-us-for-pwd}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"id": "742b0895-f93a-49c2-8972-08dd8748736b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def record_stream(url, client):\n", | |
" session = requests.Session()\n", | |
" records = []\n", | |
" start = datetime.datetime.utcnow()\n", | |
" print(f\"Started stream of MARC JSON {start}\")\n", | |
" with session.get(url, headers=client.okapi_headers, stream=True) as response:\n", | |
" for i,line in enumerate(response.iter_lines()):\n", | |
" records.append(json.loads(line))\n", | |
" end = datetime.datetime.utcnow()\n", | |
" print(f\"Finished stream of MARC JSON {end}, total time {(end-start).seconds / 60.}\")\n", | |
" return records\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 79, | |
"id": "846abcb3-b37b-4147-bdad-c3930299dc3e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Started stream of MARC JSON 2022-07-12 20:54:57.507175\n", | |
"Finished stream of MARC JSON 2022-07-12 20:56:56.378246, total time 1.9666666666666666\n" | |
] | |
} | |
], | |
"source": [ | |
"first_recs = record_stream(f\"{dev_client.okapi_url}/source-storage/stream/source-records?updatedAfter=2022-07-01&limit=100000&suppressFromDiscovery=False\", dev_client)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 80, | |
"id": "8d608278-b793-4d55-8c62-204bf5d13695", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"100000" | |
] | |
}, | |
"execution_count": 80, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(first_recs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"id": "19dd6267-3434-42d4-a255-035537de3618", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'recordId': '0000da5b-370d-5f2d-aa78-f6fb544038b6',\n", | |
" 'snapshotId': 'a044ccbc-76dd-42fb-9ad8-9d9404cb88be',\n", | |
" 'recordType': 'MARC_BIB',\n", | |
" 'parsedRecord': {'id': '0000da5b-370d-5f2d-aa78-f6fb544038b6',\n", | |
" 'content': {'fields': [{'001': 'a10451893'},\n", | |
" {'003': 'SIRSI'},\n", | |
" {'005': '20160716013742.0'},\n", | |
" {'007': 'a| |||||'},\n", | |
" {'007': 'aj|canzn'},\n", | |
" {'008': '140415s1910 cau|||||| a || | ||eng|d'},\n", | |
" {'040': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': 'CSt'},\n", | |
" {'b': 'eng'},\n", | |
" {'c': 'CSt'},\n", | |
" {'d': 'UtOrBLW'}]}},\n", | |
" {'050': {'ind1': ' ',\n", | |
" 'ind2': '4',\n", | |
" 'subfields': [{'a': 'G4364.R6 G46 [1910] .N4'}]}},\n", | |
" {'100': {'ind1': '1',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': 'New Richmond Land Company.'}]}},\n", | |
" {'245': {'ind1': '1',\n", | |
" 'ind2': '0',\n", | |
" 'subfields': [{'a': \"Map of Richmond, California. New Richmond Land Company, 801-803 Monadnock Building, San Francisco. Wall's Harbor Center Tract, The Value Center. Bolte & Braden Company, San Francisco\"},\n", | |
" {'h': '[cartographic material].'}]}},\n", | |
" {'255': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': 'Scale 1:19,200.'}]}},\n", | |
" {'264': {'ind1': ' ',\n", | |
" 'ind2': '1',\n", | |
" 'subfields': [{'a': 'San Francisco :'},\n", | |
" {'b': 'Bolte & Braden Company,'},\n", | |
" {'c': '[1910]'}]}},\n", | |
" {'300': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': '1 map ;'}, {'c': '43.5 x 62 cm'}]}},\n", | |
" {'336': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': 'cartographic image'}, {'2': 'rdacontent'}]}},\n", | |
" {'337': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': 'unmediated'}, {'2': 'rdamedia'}]}},\n", | |
" {'338': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': 'sheet'}, {'2': 'rdacarrier'}]}},\n", | |
" {'500': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': \"A real estate development map. Date is estimated. Map shows the Wall's Harbor Center Tract in red, and the City Hall Site and Civic Center in green.\"}]}},\n", | |
" {'500': {'ind1': ' ', 'ind2': ' ', 'subfields': [{'a': 'Separate Map.'}]}},\n", | |
" {'650': {'ind1': ' ',\n", | |
" 'ind2': '0',\n", | |
" 'subfields': [{'a': 'Real estate development'},\n", | |
" {'z': 'California'},\n", | |
" {'z': 'Richmond'},\n", | |
" {'x': 'Maps'},\n", | |
" {'y': '20th century.'}]}},\n", | |
" {'650': {'ind1': ' ',\n", | |
" 'ind2': '0',\n", | |
" 'subfields': [{'a': 'City blocks'},\n", | |
" {'z': 'California'},\n", | |
" {'z': 'Richmond'},\n", | |
" {'x': 'Maps'},\n", | |
" {'y': '20th century.'}]}},\n", | |
" {'651': {'ind1': ' ',\n", | |
" 'ind2': '0',\n", | |
" 'subfields': [{'a': 'California'},\n", | |
" {'v': 'Maps'},\n", | |
" {'y': '20th century.'}]}},\n", | |
" {'651': {'ind1': ' ',\n", | |
" 'ind2': '0',\n", | |
" 'subfields': [{'a': 'Richmond (Calif.)'},\n", | |
" {'x': 'Maps'},\n", | |
" {'y': '20th century.'}]}},\n", | |
" {'655': {'ind1': ' ',\n", | |
" 'ind2': '7',\n", | |
" 'subfields': [{'a': 'Maps'}, {'2': 'lcgft'}]}},\n", | |
" {'655': {'ind1': ' ',\n", | |
" 'ind2': '7',\n", | |
" 'subfields': [{'a': 'Cadastral maps.'}, {'2': 'lcgft'}]}},\n", | |
" {'590': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': 'Pub list no.: 5312.000.'}]}},\n", | |
" {'690': {'ind1': ' ',\n", | |
" 'ind2': '4',\n", | |
" 'subfields': [{'a': 'The David Rumsey Map Collection.'}]}},\n", | |
" {'856': {'ind1': '4',\n", | |
" 'ind2': '0',\n", | |
" 'subfields': [{'u': 'http://www.davidrumsey.com/luna/servlet/view/search?q=pub_list_no=5312.000%20LIMIT:RUMSEY~8~1&sort=Pub_List_No_InitialSort,Pub_Date,Pub_List_No,Series_No'}]}},\n", | |
" {'856': {'ind1': '4',\n", | |
" 'ind2': '1',\n", | |
" 'subfields': [{'u': 'http://purl.stanford.edu/yv535wk5017'},\n", | |
" {'x': 'SDR-PURL'},\n", | |
" {'x': 'item'},\n", | |
" {'x': 'file:yv535wk5017%2F5312000.jp2'},\n", | |
" {'x': 'collection:xh235dd9059::David Rumsey Map Collection at Stanford University Libraries'}]}},\n", | |
" {'035': {'ind1': ' ',\n", | |
" 'ind2': ' ',\n", | |
" 'subfields': [{'a': '(OCoLC-M)953571215'}]}},\n", | |
" {'596': {'ind1': ' ', 'ind2': ' ', 'subfields': [{'a': '13'}]}},\n", | |
" {'918': {'ind1': ' ', 'ind2': ' ', 'subfields': [{'a': '10451893'}]}},\n", | |
" {'999': {'ind1': 'f',\n", | |
" 'ind2': 'f',\n", | |
" 'subfields': [{'i': 'b6919f84-f7f3-5957-84d6-fcf603aaacbb'},\n", | |
" {'s': '0000da5b-370d-5f2d-aa78-f6fb544038b6'}]}}],\n", | |
" 'leader': '01834nem a2200397uu 4500'}},\n", | |
" 'deleted': False,\n", | |
" 'externalIdsHolder': {'instanceId': 'b6919f84-f7f3-5957-84d6-fcf603aaacbb',\n", | |
" 'instanceHrid': 'a10451893'},\n", | |
" 'additionalInfo': {'suppressDiscovery': False},\n", | |
" 'metadata': {'createdDate': 1657579301983,\n", | |
" 'createdByUserId': 'd6507bbe-e985-4baa-beb4-d7ca4d6c279f',\n", | |
" 'updatedDate': 1657579303250,\n", | |
" 'updatedByUserId': 'd6507bbe-e985-4baa-beb4-d7ca4d6c279f'}}" | |
] | |
}, | |
"execution_count": 83, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"first_recs[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"id": "8a992fad-e7b1-488b-a636-b15528d1a838", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open(\"folio-srs-10k.json\", \"w+\") as fo:\n", | |
" json.dump(first_recs, fo)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"id": "4ed2d027-b6f7-4474-aa9b-f41180753804", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from pymarc import parse_json_to_array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 82, | |
"id": "681fd0ce-29c3-4605-8477-57737c2cf7dc", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rtac_url = f\"{dev_client.okapi_url}/rtac-batch\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"id": "e4c01205-fef1-488a-bd61-6fcf1f020866", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"result =requests.post(rtac_url,\n", | |
" headers=dev_client.okapi_headers,\n", | |
" json={ \"instanceIds\": ['b6919f84-f7f3-5957-84d6-fcf603aaacbb'],\n", | |
" \"fullPeriodicals\" : 'false'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"id": "fe6ff772-e6f5-4713-9159-416af159e2bc", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"200" | |
] | |
}, | |
"execution_count": 86, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result.status_code" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"id": "4eb5d26f-8010-4478-80a5-47d8deb9d99b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'holdings': [{'instanceId': 'b6919f84-f7f3-5957-84d6-fcf603aaacbb',\n", | |
" 'holdings': [{'id': 'e90af420-7ed2-54ba-b297-0c871d676b7f',\n", | |
" 'location': 'Map Center (W7 large box)',\n", | |
" 'callNumber': 'G4364.R6 G46 [1910] .N4',\n", | |
" 'status': 'Available',\n", | |
" 'permanentLoanType': 'Can circulate'}]}]}" | |
] | |
}, | |
"execution_count": 87, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result.json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c8296473-6937-4102-96b5-41cf1e05a1b3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment