Skip to content

Instantly share code, notes, and snippets.

@CliffordAnderson
Created October 30, 2013 21:39
Show Gist options
  • Save CliffordAnderson/7240815 to your computer and use it in GitHub Desktop.
Save CliffordAnderson/7240815 to your computer and use it in GitHub Desktop.
Reads a page from a given book at the Internet Archive and extracts disambiguated geographic entities as GeoJSON using the Alchemy API
(: Extracts entities from Internet Archive texts using the Alchemy API :)
(: Serializes disambiguated Entities to GeoJSON :)
xquery version "3.0";
declare namespace csv = "http://basex.org/modules/json";
declare function local:read-page($title as xs:string, $page as xs:integer) as item()?
{
let $text := fn:string-join(
let $doc := fn:doc(fn:concat("geojson/", $title))//OBJECT[$page]
return
for $word in $doc//WORD/text()
return ($word, " "))
return fn:encode-for-uri($text)
};
let $api-key := "[YOUR KEY HERE]"
let $doc := "voyageofthetwosi00smit_djvu"
let $page := local:read-page($doc, 15)
let $json :=
<json type="object">
<type>FeatureCollection</type>
<features type="array">
{
let $entities :=
http:send-request(
<http:request method='post' href='http://access.alchemyapi.com/calls/text/TextGetRankedNamedEntities'>
<http:body media-type='application/x-www-form-urlencoded' method='text'>apikey={$api-key}&amp;text={$page}</http:body>
</http:request>
)/results/entities
for $entity in $entities/entity
where $entity//geo
return
<feature type="object">
<type>Feature</type>
<properties type="object">
{($entity/text)}
</properties>
<geometry type="object">
<type>Point</type>
<coordinates type="array">
<coordinates>{fn:substring-after($entity/disambiguated/geo, " ")}</coordinates>
<coordinates>{fn:substring-before($entity/disambiguated/geo, " ")}</coordinates>
</coordinates>
</geometry>
</feature>
}</features></json>
return json:serialize($json)
@CliffordAnderson
Copy link
Author

Sample output of Page 15 of voyageofthetwosi00smit_djvu in GeoJSON

{
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {
        "text": "Long Island Sound"
      },
      "geometry": {
        "type": "Point",
        "coordinates": [
          "-72.90138888888889",
          "41.105"
        ]
      }
    }
  ]
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment