Skip to content

Instantly share code, notes, and snippets.

@joewiz
Last active September 30, 2017 21:32
Show Gist options
  • Save joewiz/5938909 to your computer and use it in GitHub Desktop.
Save joewiz/5938909 to your computer and use it in GitHub Desktop.
Download a file via HTTP and save to an eXist-db collection; uses EXPath modules where possible
xquery version "3.1";
import module namespace hc="http://expath.org/ns/http-client";
import module namespace util="http://exist-db.org/xquery/util";
import module namespace xmldb="http://exist-db.org/xquery/xmldb";
(: downloads a file from a remote HTTP server at $file-url and save it to an eXist-db $collection.
: we try hard to recognize XML files and save them with the correct mimetype so that eXist-db can
: efficiently index and query the files; if it doesn't appear to be XML, though, we just trust
: the response headers :)
declare function local:http-download($file-url as xs:string, $collection as xs:string) as item()* {
let $request := <hc:request href="{$file-url}" method="GET"/>
let $response := hc:send-request($request)
let $head := $response[1]
(: These sample responses from EXPath HTTP client reveals where the response code, media-type, and filename can be found:
<hc:response xmlns:http="http://expath.org/ns/http-client" status="200" message="OK">
<hc:header name="connection" value="close"/>
<hc:header name="transfer-encoding" value="chunked"/>
<hc:header name="content-type" value="application/zip"/>
<hc:header name="content-disposition" value="attachment; filename=xqjson-master.zip"/>
<hc:header name="date" value="Sat, 06 Jul 2013 05:59:04 GMT"/>
<hc:body media-type="application/zip"/>
</hc:response>
<hc:response xmlns:http="http://expath.org/ns/http-client" status="200" message="OK">
<hc:header name="date" value="Sat, 06 Jul 2013 06:26:34 GMT"/>
<hc:header name="server" value="GitHub.com"/>
<hc:header name="content-type" value="text/plain; charset=utf-8"/>
<hc:header name="status" value="200 OK"/>
<hc:header name="content-disposition" value="inline"/>
<hc:header name="content-transfer-encoding" value="binary"/>
<hc:header name="etag" value=""a6782b6125583f16632fa103a828fdd6""/>
<hc:header name="vary" value="Accept-Encoding"/>
<hc:header name="cache-control" value="private"/>
<hc:header name="keep-alive" value="timeout=10, max=50"/>
<hc:header name="connection" value="Keep-Alive"/>
<hc:body media-type="text/plain"/>
</hc:response>
:)
return
(: check to ensure the remote server indicates success :)
if ($head/@status = '200') then
(: try to get the filename from the content-disposition header, otherwise construct from the $file-url :)
let $filename :=
if (contains($head/hc:header[@name='content-disposition']/@value, 'filename=')) then
$head/hc:header[@name='content-disposition']/@value/substring-after(., 'filename=')
else
(: use whatever comes after the final / as the file name:)
replace($file-url, '^.*/([^/]*)$', '$1')
(: override the stated media type if the file is known to be .xml :)
let $media-type := $head/hc:body/@media-type
let $mime-type :=
if (ends-with($file-url, '.xml') and $media-type = 'text/plain') then
'application/xml'
else
$media-type
(: if the file is XML and the payload is binary, we need convert the binary to string :)
let $content-transfer-encoding := $head/hc:body[@name = 'content-transfer-encoding']/@value
let $body := $response[2]
let $file :=
if (ends-with($file-url, '.xml') and $content-transfer-encoding = 'binary') then
util:binary-to-string($body)
else
$body
return
xmldb:store($collection, $filename, $file, $mime-type)
else
<error>
<message>Oops, something went wrong:</message>
{$head}
</error>
};
let $url := 'https://raw.github.com/joewiz/xqjson/master/build.xml'
let $collection := '/db'
return
local:http-download($url, $collection)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment