Created
March 8, 2024 16:16
-
-
Save bltravis/5c908139f86703c274383716129a596a to your computer and use it in GitHub Desktop.
change_manager_import.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/bltravis/5c908139f86703c274383716129a596a/change_manager_import.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "SKj_VjJ1vyUG" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install folioclient\n", | |
"!pip install httpx\n", | |
"!pip install pymarc" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ZsNdPQsyvyUI" | |
}, | |
"source": [ | |
"# Data Import without \"data-import\"\n", | |
"It's possible to bypass the file-based data import workflow and basically \"stream\" records into FOLIO via the Source Record Manager module APIs.\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"id": "Keie1BcMvyUJ" | |
}, | |
"outputs": [], | |
"source": [ | |
"import httpx\n", | |
"import folioclient\n", | |
"import pymarc\n", | |
"import uuid\n", | |
"from getpass import getpass\n", | |
"\n", | |
"# If using Google Colab\n", | |
"try:\n", | |
" from google.colab import files\n", | |
"except ImportError:\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "Ua2JjUUtvyUJ" | |
}, | |
"outputs": [], | |
"source": [ | |
"fc = folioclient.FolioClient(\"https://folio-snapshot-okapi.dev.folio.org\", \"diku\", input(\"Okapi username: \"), getpass(\"Okapi password: \"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "Kv32rpCovyUJ" | |
}, | |
"outputs": [], | |
"source": [ | |
"create_job = httpx.post(fc.okapi_url + \"/change-manager/jobExecutions\", headers=fc.okapi_headers, json={\"sourceType\": \"ONLINE\", \"userId\": fc.current_user})\n", | |
"create_job.raise_for_status()\n", | |
"create_job.json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "-znc6YQKvyUJ" | |
}, | |
"outputs": [], | |
"source": [ | |
"import_profiles = fc.folio_get(\"/data-import-profiles/jobProfiles\", query_params={\"limit\": \"1000\"})\n", | |
"import_profiles['jobProfiles']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"id": "hb2_o9uhvyUK" | |
}, | |
"outputs": [], | |
"source": [ | |
"default_bib_profile = {\n", | |
" 'id': 'e34d7b92-9b83-11eb-a8b3-0242ac130003',\n", | |
" 'name': 'Default - Create instance and SRS MARC Bib',\n", | |
" 'dataType': 'MARC'\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "HqcpN2s1vyUK" | |
}, | |
"outputs": [], | |
"source": [ | |
"set_job_profile = httpx.put(fc.okapi_url + f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}/jobProfile\", headers=fc.okapi_headers, json=default_bib_profile)\n", | |
"set_job_profile.raise_for_status()\n", | |
"set_job_profile.json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"try:\n", | |
" marc_file = files.upload()\n", | |
" marc_file_name = next(iter(marc_file))\n", | |
"except Exception:\n", | |
" marc_file_name = \"../witcher_sample.mrc\" # Change this to a local sample file path if you're running this notebook locally" | |
], | |
"metadata": { | |
"id": "aa_coTm8wPdM" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"id": "BsbzH7q3vyUK" | |
}, | |
"outputs": [], | |
"source": [ | |
"marc_records = []\n", | |
"with open(marc_file_name, \"rb\") as openfile:\n", | |
" reader = pymarc.MARCReader(openfile, hide_utf8_warnings=True)\n", | |
" for record in reader:\n", | |
" if record:\n", | |
" marc_records.append(record)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "LNI_vBl9vyUK" | |
}, | |
"outputs": [], | |
"source": [ | |
"len(marc_records)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"id": "E98bxYw1vyUK" | |
}, | |
"outputs": [], | |
"source": [ | |
"base_payload = {\n", | |
" \"id\": str(uuid.uuid4()),\n", | |
" \"recordsMetadata\": {\n", | |
" \"last\": False,\n", | |
" \"counter\": len(marc_records),\n", | |
" \"contentType\": \"MARC_RAW\",\n", | |
" \"total\": len(marc_records)\n", | |
" },\n", | |
" \"initialRecords\": []\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"id": "JjBVuu0ovyUK" | |
}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"for record in marc_records:\n", | |
" base_payload['initialRecords'].append(\n", | |
" {\n", | |
" \"record\": record.as_marc().decode()\n", | |
" }\n", | |
" )\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"id": "SNP9ZG7gvyUL" | |
}, | |
"outputs": [], | |
"source": [ | |
"base_payload[\"initialRecords\"]\n", | |
"base_payload" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"post_records = httpx.post(fc.okapi_url + f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}/records\", headers=fc.okapi_headers, json=base_payload)\n", | |
"post_records.raise_for_status()" | |
], | |
"metadata": { | |
"id": "FdPXTSqbzlEg" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"job_status = fc.folio_get(f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}\")\n", | |
"job_status\n", | |
"last_records = {\n", | |
" \"id\": str(uuid.uuid4()),\n", | |
" \"recordsMetadata\": {\n", | |
" \"last\": True,\n", | |
" \"counter\": len(marc_records),\n", | |
" \"total\": len(marc_records),\n", | |
" \"contentType\": \"MARC_RAW\"\n", | |
" },\n", | |
" \"initialRecords\": []\n", | |
"}" | |
], | |
"metadata": { | |
"id": "iyuygdCtzkqi" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "sfO5SSQxvyUL" | |
}, | |
"outputs": [], | |
"source": [ | |
"post_records_final = httpx.post(fc.okapi_url + f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}/records\", headers=fc.okapi_headers, json=last_records)\n", | |
"post_records_final.raise_for_status()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"job_status = fc.folio_get(f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}\")\n", | |
"job_status" | |
], | |
"metadata": { | |
"id": "ZGknc1_8zvYd" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "mac9nZYbzDr6" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"language_info": { | |
"name": "python" | |
}, | |
"colab": { | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment