Skip to content

Instantly share code, notes, and snippets.

@bltravis
Created March 8, 2024 16:16
Show Gist options
  • Save bltravis/5c908139f86703c274383716129a596a to your computer and use it in GitHub Desktop.
Save bltravis/5c908139f86703c274383716129a596a to your computer and use it in GitHub Desktop.
change_manager_import.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/bltravis/5c908139f86703c274383716129a596a/change_manager_import.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "SKj_VjJ1vyUG"
},
"outputs": [],
"source": [
"!pip install folioclient\n",
"!pip install httpx\n",
"!pip install pymarc"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZsNdPQsyvyUI"
},
"source": [
"# Data Import without \"data-import\"\n",
"It's possible to bypass the file-based data import workflow and basically \"stream\" records into FOLIO via the Source Record Manager module APIs.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "Keie1BcMvyUJ"
},
"outputs": [],
"source": [
"import httpx\n",
"import folioclient\n",
"import pymarc\n",
"import uuid\n",
"from getpass import getpass\n",
"\n",
"# If using Google Colab\n",
"try:\n",
" from google.colab import files\n",
"except ImportError:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ua2JjUUtvyUJ"
},
"outputs": [],
"source": [
"fc = folioclient.FolioClient(\"https://folio-snapshot-okapi.dev.folio.org\", \"diku\", input(\"Okapi username: \"), getpass(\"Okapi password: \"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Kv32rpCovyUJ"
},
"outputs": [],
"source": [
"create_job = httpx.post(fc.okapi_url + \"/change-manager/jobExecutions\", headers=fc.okapi_headers, json={\"sourceType\": \"ONLINE\", \"userId\": fc.current_user})\n",
"create_job.raise_for_status()\n",
"create_job.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-znc6YQKvyUJ"
},
"outputs": [],
"source": [
"import_profiles = fc.folio_get(\"/data-import-profiles/jobProfiles\", query_params={\"limit\": \"1000\"})\n",
"import_profiles['jobProfiles']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"id": "hb2_o9uhvyUK"
},
"outputs": [],
"source": [
"default_bib_profile = {\n",
" 'id': 'e34d7b92-9b83-11eb-a8b3-0242ac130003',\n",
" 'name': 'Default - Create instance and SRS MARC Bib',\n",
" 'dataType': 'MARC'\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "HqcpN2s1vyUK"
},
"outputs": [],
"source": [
"set_job_profile = httpx.put(fc.okapi_url + f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}/jobProfile\", headers=fc.okapi_headers, json=default_bib_profile)\n",
"set_job_profile.raise_for_status()\n",
"set_job_profile.json()"
]
},
{
"cell_type": "code",
"source": [
"try:\n",
" marc_file = files.upload()\n",
" marc_file_name = next(iter(marc_file))\n",
"except Exception:\n",
" marc_file_name = \"../witcher_sample.mrc\" # Change this to a local sample file path if you're running this notebook locally"
],
"metadata": {
"id": "aa_coTm8wPdM"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "BsbzH7q3vyUK"
},
"outputs": [],
"source": [
"marc_records = []\n",
"with open(marc_file_name, \"rb\") as openfile:\n",
" reader = pymarc.MARCReader(openfile, hide_utf8_warnings=True)\n",
" for record in reader:\n",
" if record:\n",
" marc_records.append(record)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "LNI_vBl9vyUK"
},
"outputs": [],
"source": [
"len(marc_records)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "E98bxYw1vyUK"
},
"outputs": [],
"source": [
"base_payload = {\n",
" \"id\": str(uuid.uuid4()),\n",
" \"recordsMetadata\": {\n",
" \"last\": False,\n",
" \"counter\": len(marc_records),\n",
" \"contentType\": \"MARC_RAW\",\n",
" \"total\": len(marc_records)\n",
" },\n",
" \"initialRecords\": []\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"id": "JjBVuu0ovyUK"
},
"outputs": [],
"source": [
"\n",
"for record in marc_records:\n",
" base_payload['initialRecords'].append(\n",
" {\n",
" \"record\": record.as_marc().decode()\n",
" }\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"id": "SNP9ZG7gvyUL"
},
"outputs": [],
"source": [
"base_payload[\"initialRecords\"]\n",
"base_payload"
]
},
{
"cell_type": "code",
"source": [
"post_records = httpx.post(fc.okapi_url + f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}/records\", headers=fc.okapi_headers, json=base_payload)\n",
"post_records.raise_for_status()"
],
"metadata": {
"id": "FdPXTSqbzlEg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"job_status = fc.folio_get(f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}\")\n",
"job_status\n",
"last_records = {\n",
" \"id\": str(uuid.uuid4()),\n",
" \"recordsMetadata\": {\n",
" \"last\": True,\n",
" \"counter\": len(marc_records),\n",
" \"total\": len(marc_records),\n",
" \"contentType\": \"MARC_RAW\"\n",
" },\n",
" \"initialRecords\": []\n",
"}"
],
"metadata": {
"id": "iyuygdCtzkqi"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sfO5SSQxvyUL"
},
"outputs": [],
"source": [
"post_records_final = httpx.post(fc.okapi_url + f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}/records\", headers=fc.okapi_headers, json=last_records)\n",
"post_records_final.raise_for_status()\n"
]
},
{
"cell_type": "code",
"source": [
"job_status = fc.folio_get(f\"/change-manager/jobExecutions/{create_job.json()['parentJobExecutionId']}\")\n",
"job_status"
],
"metadata": {
"id": "ZGknc1_8zvYd"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "mac9nZYbzDr6"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"language_info": {
"name": "python"
},
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment