-
-
Save AdamSteffanick/9158c5c70c4de80833f021b2f48ac4b0 to your computer and use it in GitHub Desktop.
Sample code for extracting, transforming, and loading IA metadata into BaseX
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| xquery version "3.1"; | |
| declare %updating function local:persist($db as xs:string, $doc as element()) as empty-sequence() | |
| { | |
| let $book := element book {$doc/*} | |
| let $key := $book//key/text() | |
| return db:replace($db, $key, $book) | |
| }; | |
| let $db := "books" | |
| let $search-term := "tree" | |
| let $uri := "http://openlibrary.org/search.json?title=" || $search-term | |
| let $docs := fetch:text($uri) => json:parse() | |
| for $doc in $docs/json/docs/_ | |
| return local:persist($db, $doc) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| xquery version "3.1"; | |
| declare function local:convert($doc as element()*) as element()* | |
| { | |
| element book { | |
| attribute key {$doc/edition__key/_/text()}, | |
| attribute full-text {$doc/has__fulltext/text()}, | |
| for $title in $doc/title__suggest/text() return element title { attribute title {$doc/title/text}, $title }, | |
| for $author at $x in $doc/author__name/_/text() return element author {attribute key {$doc/author__key/_[$x]/text()}, $author }, | |
| element date { | |
| attribute first {$doc/publish__year/_/text()}, | |
| attribute year {$doc/first__publish__year/_/text()}, | |
| $doc/publish__date/text() }, | |
| element type { | |
| $doc/type/text() | |
| }, | |
| element place { | |
| for $place in $doc/publish__place/_/text() | |
| return element location {$place} | |
| }, | |
| element publisher { | |
| for $publisher in $doc/publisher/_/text() | |
| return element house {$publisher} | |
| }, | |
| element language { | |
| $doc/language/_/text() | |
| }, | |
| element lccn { | |
| $doc/lccn/_/text() | |
| }, | |
| element key { | |
| $doc/key/text() | |
| }, | |
| for $isbn in $doc/isbn/text() return element isbn { $isbn }, | |
| element texts { for $text in $doc/text/_/text() return element text { $text } }, | |
| element seeds { for $seed in $doc/seed/_/text() return element seed { $seed } }, | |
| element subjects {for $subject in $doc/subject/_/text() return element subject { $subject } } | |
| } | |
| }; | |
| declare %updating function local:add( $book as element(book), $db as xs:string ) as empty-sequence() | |
| { | |
| let $uri := $book/key | |
| return db:replace($db, $uri, $book) | |
| }; | |
| let $search-term := "literature" | |
| let $db := "books" | |
| let $uri := "http://openlibrary.org/search.json?title=" || $search-term | |
| let $json := fetch:text($uri) | |
| let $xml := json:parse($json) | |
| for $doc in $xml/json/docs/_ | |
| return local:convert($doc) => local:add($db) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| xquery version "3.1"; | |
| declare function local:convert($doc as element()*) as element()* | |
| { | |
| element book { | |
| attribute key {$doc/edition__key/_/text()}, | |
| attribute full-text {$doc/has__fulltext/text()}, | |
| for $title in $doc/title__suggest/text() return element title { attribute title {$doc/title/text}, $title }, | |
| for $author at $x in $doc/author__name/_/text() return element author {attribute key {$doc/author__key/_[$x]/text()}, $author }, | |
| element date { | |
| attribute first {$doc/publish__year/_/text()}, | |
| attribute year {$doc/first__publish__year/_/text()}, | |
| $doc/publish__date/text() }, | |
| element type { | |
| $doc/type/text() | |
| }, | |
| element place { | |
| $doc/publish__place/_/text() | |
| }, | |
| element publisher { | |
| $doc/publisher/_/text() | |
| }, | |
| element language { | |
| $doc/language/_/text() | |
| }, | |
| element lccn { | |
| $doc/lccn/_/text() | |
| }, | |
| element key { | |
| $doc/key/text() | |
| }, | |
| for $isbn in $doc/isbn/text() return element isbn { $isbn }, | |
| element texts { for $text in $doc/text/_/text() return element text { $text } }, | |
| element seeds { for $seed in $doc/seed/_/text() return element seed { $seed } }, | |
| element subjects {for $subject in $doc/subject/_/text() return element subject { $subject } } | |
| } | |
| }; | |
| declare %updating function local:add( $book as element(book), $db as xs:string ) as empty-sequence() | |
| { | |
| let $uri := $book/key | |
| return db:replace($db, $uri, $book) | |
| }; | |
| let $search-term := "barth" | |
| let $db := "books" | |
| let $uri := "http://openlibrary.org/search.json?title=" | |
| let $json := fetch:text($uri || $search-term ) | |
| let $xml := json:parse($json) | |
| let $pages := fn:ceiling($xml/json/numFound/text() div 100) | |
| for $page in (1 to xs:int($pages)) | |
| return | |
| let $json := fetch:text($uri || $search-term || "&page=" || $page ) | |
| let $xml := json:parse($json) | |
| for $doc in $xml/json/docs/_ | |
| return local:convert($doc) => local:add($db) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment