Created
October 30, 2013 21:39
-
-
Save CliffordAnderson/7240815 to your computer and use it in GitHub Desktop.
Reads a page from a given book at the Internet Archive and extracts disambiguated geographic entities as GeoJSON using the Alchemy API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(: Extracts entities from Internet Archive texts using the Alchemy API :) | |
(: Serializes disambiguated Entities to GeoJSON :) | |
xquery version "3.0"; | |
declare namespace csv = "http://basex.org/modules/json"; | |
declare function local:read-page($title as xs:string, $page as xs:integer) as item()? | |
{ | |
let $text := fn:string-join( | |
let $doc := fn:doc(fn:concat("geojson/", $title))//OBJECT[$page] | |
return | |
for $word in $doc//WORD/text() | |
return ($word, " ")) | |
return fn:encode-for-uri($text) | |
}; | |
let $api-key := "[YOUR KEY HERE]" | |
let $doc := "voyageofthetwosi00smit_djvu" | |
let $page := local:read-page($doc, 15) | |
let $json := | |
<json type="object"> | |
<type>FeatureCollection</type> | |
<features type="array"> | |
{ | |
let $entities := | |
http:send-request( | |
<http:request method='post' href='http://access.alchemyapi.com/calls/text/TextGetRankedNamedEntities'> | |
<http:body media-type='application/x-www-form-urlencoded' method='text'>apikey={$api-key}&text={$page}</http:body> | |
</http:request> | |
)/results/entities | |
for $entity in $entities/entity | |
where $entity//geo | |
return | |
<feature type="object"> | |
<type>Feature</type> | |
<properties type="object"> | |
{($entity/text)} | |
</properties> | |
<geometry type="object"> | |
<type>Point</type> | |
<coordinates type="array"> | |
<coordinates>{fn:substring-after($entity/disambiguated/geo, " ")}</coordinates> | |
<coordinates>{fn:substring-before($entity/disambiguated/geo, " ")}</coordinates> | |
</coordinates> | |
</geometry> | |
</feature> | |
}</features></json> | |
return json:serialize($json) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sample output of Page 15 of voyageofthetwosi00smit_djvu in GeoJSON