Created
December 9, 2020 16:25
-
-
Save benwbrum/377604bc69b422834f11804ba604a07a to your computer and use it in GitHub Desktop.
Internet Archive collection to IIIF collection manifest script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'oai' | |
require 'pry' | |
require 'json' | |
collection = ARGV.first | |
manifest = { | |
"@context" => "http://iiif.io/api/presentation/2/context.json", | |
"@id" => "https://fromthepage.com/iiif/collection/5", | |
"@type" => "sc:Collection", | |
"label": "IIIF Collection for #{collection}", | |
"manifests" => [] | |
} | |
items = [] | |
client = OAI::Client.new 'http://www.archive.org/services/oai.php', :headers => { "From" => "[email protected]" } | |
response = client.list_records(:set => "collection:#{collection}") | |
loop do | |
response.each do |record| | |
if record.metadata.elements['.//dc:identifier'] | |
details_id = record.metadata.elements['.//dc:identifier'].text | |
book_id = details_id.sub(/^.*\//,'') | |
iiif_id = "https://iiif.archivelab.org/iiif/#{book_id}/manifest.json" | |
title = record.metadata.elements['.//dc:title'].text | |
items << | |
{ | |
"@id" => iiif_id, | |
"@type"=> "sc:Manifest", | |
"label"=> title | |
} | |
end | |
end | |
break if response.resumption_token.nil? | |
response = client.list_records(:resumption_token => response.resumption_token) | |
end | |
manifest["manifests"]=items | |
print manifest.to_json(pretty: true) | |
# https://archive.org/services/collection-rss.php?collection=middleburycollege | |
# https://archive.org/details/middleburycollege?and[]=mediatype%3A%22texts%22 | |
# turns to | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment