Created
September 6, 2013 18:19
-
-
Save phi-gamma/6467801 to your computer and use it in GitHub Desktop.
typeset google spreadsheets directly in Luatex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%% http://tex.stackexchange.com/q/131500 | |
\documentclass {scrartcl} | |
\usepackage {luatexbase} | |
\RequireLuaModule {lualibs} | |
\usepackage {luaotfload} %% recommended, in that order! | |
\RequireLuaModule {spreadsheet} | |
\makeatletter | |
\protected \def \googlespreadsheet {% | |
\@ifnextchar[\googlespreadsheetopt | |
{\googlespreadsheetopt[]}% | |
} | |
\def \googlespreadsheetopt [#1]#2{% | |
\edef \currentspreadsheetoptions {#1}% | |
\directlua { | |
packagedata.spreadsheet.urihandler ([[\currentspreadsheetoptions]], | |
[[\detokenize {#2}]]) | |
}% | |
} | |
\makeatother | |
\begin {document} | |
\input knuth | |
\begin {table} | |
\googlespreadsheet [center,dump] | |
{https://docs.google.com/spreadsheet/ccc?key=0Amykmqr4Of-MdEVIUUcyYld3WTJhZnJHRkgwSF9CaUE&usp=sharing} | |
\end {table} | |
\input knuth | |
\end {document} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env texlua | |
----------------------------------------------------------------------- | |
-- FILE: spreadsheet.lua | |
-- DESCRIPTION: retrieve and typeset spreadsheets from google docs | |
-- REQUIREMENTS: Luatex, Luasec | |
-- AUTHOR: Philipp Gesang (Phg), <[email protected]> | |
-- VERSION: 1.0 | |
-- CREATED: 2013-09-06 17:26:50+0200 | |
----------------------------------------------------------------------- | |
-- | |
packagedata = packagedata or { } | |
packagedata.spreadsheet = packagedata.spreadsheet or { } | |
local spreadsheet = packagedata.spreadsheet | |
spreadsheet.module_info = { | |
name = "spreadsheet", | |
version = 0.0, | |
date = "2013/09/06", | |
description = "Typeset spreadsheets from Google Docs", | |
author = "Philipp Gesang", | |
copyright = "Philipp Gesang", | |
license = "BSD 2 clause", | |
} | |
if not config.lualibs then | |
config = config or { } | |
config.lualibs = config.lualibs or { } | |
config.lualibs.load_extended = true | |
require "lualibs" | |
end | |
if not caches then | |
local u = utilities | |
require "luaotfload-basics-gen" | |
u.storage = utilities.storage | |
utilities = u | |
end | |
local lpeg = require "lpeg" | |
local md5 = require "md5" | |
local socket = require "socket" | |
local ssl = require "ssl" | |
local http = require "socket.http" | |
local https = require "ssl.https" | |
local stringformat = string.format | |
local stringrep = string.rep | |
local tableconcat = table.concat | |
local tabletohash = table.tohash | |
local texiowrite_nl = texio.write_nl | |
local getwritablepath = caches.getwritablepath | |
local md5sumhexa = md5.sumhexa | |
local lfsisfile = lfs.isfile | |
local ioloaddata = io.loaddata | |
local iosavedata = io.savedata | |
local texsprint = tex.sprint | |
local unpack = unpack or table.unpack | |
local tonumber = tonumber | |
local sockettcp = socket.tcp | |
local sslwrap = ssl.wrap | |
local C, Cs, P, R, S = lpeg.C, lpeg.Cs, lpeg.P, lpeg.R, lpeg.S | |
local lpegmatch = lpeg.match | |
local urlescaper = lpeg.patterns.urlescaper | |
local urlsplitter = lpeg.patterns.urlsplitter | |
local rfc4180splitter = utilities.parsers.rfc4180splitter () | |
local error, warn, info | |
if luatexbase and luatexbase.provides_module then | |
error, warn, info = luatexbase.provides_module(spreadsheet.module_info) | |
else | |
kpse.set_program_name "luatex" | |
error, warn, info = texiowrite_nl, texiowrite_nl, texiowrite_nl | |
end | |
----------------------------------------------------------------------- | |
--- connection handling | |
----------------------------------------------------------------------- | |
local ssl_parameters = { | |
mode = "client", | |
protocol = "tlsv1_2", | |
--protocol = "sslv3", | |
verify = "none", | |
options = "all", | |
} | |
local http_302 = [[HTTP/1.1 302 Moved Temporarily]] | |
local http_200 = [[HTTP/1.1 200 OK]] | |
local p_restof = (1 - P(-1))^1 * P(-1) | |
local p_location = P"Location: " * C(p_restof) | |
local p_setcookie = P"Set-Cookie: " * C(p_restof) | |
local p_http_header = C((R("az", "AZ") + P"-")^1 * P": " * p_restof) | |
local semicolon = P";" | |
local p_cookie = C((1 - semicolon)^1) --- ignore attributes | |
local tpl_GET = "GET %s HTTP/1.1" | |
local tpl_HEAD = "HEAD %s HTTP/1.1" | |
local header_Host = "Host: %s" | |
local header_User_Agent = "User-Agent: hackish CSV retrieval for LuaTeX" | |
local header_Accept = "Accept: */*" | |
local header_Connection = "Connection: Keep-Alive" | |
local header_Cookie = "Cookie: %s" | |
----- header_Referer = "Referer: %s" | |
local get_ssl_connection = function (hostname) | |
--- open tcp socket | |
local conn = sockettcp () | |
conn:connect (hostname, 443) | |
--- envelope connection in ssl | |
conn = sslwrap (conn, ssl_parameters) | |
conn:dohandshake () | |
return conn | |
end | |
local make_request = function (resource, hostname, from, cookies) | |
local req = { } | |
--resource = lpegmatch (urlescaper, resource) | |
--req[#req + 1] = string.format (tpl_HEAD, resource) | |
req[#req + 1] = stringformat (tpl_GET, resource) | |
req[#req + 1] = header_User_Agent | |
req[#req + 1] = stringformat (header_Host, hostname) | |
req[#req + 1] = header_Accept | |
req[#req + 1] = header_Connection | |
if cookies and next (cookies) then | |
req[#req + 1] = stringformat (header_Cookie, | |
tableconcat (cookies, "; ")) | |
end | |
return tableconcat (req, "\n") .. "\n\n" | |
end | |
local max_redirects = 10 | |
local resolve_indirection = function (uri) | |
local from, conn, hostname | |
local cookies = { } | |
local cnt = 0 -- count redirects | |
local components = lpegmatch (urlsplitter, uri) | |
while uri and cnt <= max_redirects do | |
local req, line, err, redirect | |
if not conn then | |
hostname = components [2] | |
conn = get_ssl_connection (hostname) | |
end | |
local resource = stringformat ([[/%s?%s]], | |
components [3], | |
components [4]) | |
local request = make_request (resource, hostname, from, cookies) | |
local sent, err, last = conn:send (request) | |
if not sent then | |
warn ("> error; bytes sent:", last) | |
warn "> retrying ... " | |
local sent, err, last = conn:send (request, last) | |
if not sent then | |
warn ("> failed, aborting") | |
return | |
end | |
end | |
line, err, x, y = conn:receive () | |
--info (stringformat (">>> response=%q error=%q", line, err or "<none>")) | |
if err then | |
conn:close () | |
return | |
end | |
local found = line == http_200 | |
local keepalive = false | |
while line do | |
redirect = lpegmatch (p_location, line) or redirect | |
local cookie = lpegmatch (p_setcookie, line) | |
if cookie then | |
cookie = lpegmatch (p_cookie, cookie) | |
cookies [#cookies + 1] = cookie | |
end | |
if line == header_Connection then | |
keepalive = true | |
end | |
line, err = conn:receive () | |
if string.is_empty (line) then --- end of header | |
line = nil | |
end | |
end --- while line | |
if found then | |
local data | |
line, err = conn:receive () | |
if line then | |
local bytes = tonumber (line, 16) | |
if bytes then | |
data = conn:receive (bytes) | |
end | |
end | |
return uri, data | |
end | |
from = uri | |
uri = redirect | |
cnt = cnt + 1 | |
components = lpegmatch (urlsplitter, uri) | |
if keepalive == false or hostname ~= components [2] then | |
conn:close () | |
conn = nil | |
collectgarbage "collect" | |
end | |
end -- while uri | |
end | |
----------------------------------------------------------------------- | |
--- url handling | |
----------------------------------------------------------------------- | |
local ampersand = P"&" | |
local output = P"output=" * ((1 - ampersand)^1 / "csv") | |
local p_output = (1 - output)^0 * output | |
local s_output = Cs((output + 1)^0) | |
local rewrite_spreadsheet_uri = function (uri) | |
info "Generating URL to CSV data." | |
if not uri then | |
return false | |
end | |
--- check if the uri already contains an output parameter | |
if lpegmatch (p_output, uri) then | |
--- if so, replace all occurrences with “csv” as argument | |
return lpegmatch (s_output, uri) | |
end | |
--- else, append the desired parameter | |
return uri .. [[&output=csv]] | |
end | |
local get_raw_doc = function (curi, force) | |
info (stringformat ("Retrieving URI %q.", curi)) | |
local hash = md5sumhexa (curi) | |
local writable = getwritablepath ("spreadsheet", "") | |
local cachefile = writable .. "/" .. hash | |
if not force and lfsisfile (cachefile) then | |
local data = ioloaddata (cachefile) | |
info (stringformat ("Spreadsheet (%d bytes) found in cache.", | |
#data)) | |
return "<cache>", data | |
end | |
local resolved, data = resolve_indirection (curi) | |
if not resolved then | |
info "Failed to resolve URI, sorry!" | |
return nil, nil | |
end | |
info (stringformat ("Found at %q (%d bytes).", | |
resolved, #data)) | |
iosavedata (cachefile, data) | |
info (stringformat ("Spreadsheet stored in cache at %q.", | |
cachefile)) | |
return resolved, data | |
end | |
----------------------------------------------------------------------- | |
--- csv extraction | |
----------------------------------------------------------------------- | |
local extract_data = function (raw) | |
local parsed = rfc4180splitter (raw) | |
if not parsed then | |
info "Cannot parse CSV file." | |
end | |
return parsed | |
end | |
----------------------------------------------------------------------- | |
--- latex writer | |
----------------------------------------------------------------------- | |
--- we consider the longest row only when making a spec | |
local colspec = { | |
center = "c", | |
left = "l", | |
right = "r", | |
default = "l", | |
} | |
local make_tablespec = function (rows, options) | |
local align = options.center and "center" | |
or options.left and "left" | |
or options.right and "right" | |
or "default" | |
local longest = 0 | |
for i = 1, #rows do | |
local len = #rows [i] | |
if len > longest then | |
longest = len | |
end | |
end | |
return longest, stringrep (colspec [align], longest) | |
end | |
local make_row = function (row, n) | |
local acc = { } | |
local i = 1 | |
while i <= n do | |
local cell = row [i] | |
if cell then | |
acc [#acc + 1] = cell | |
else | |
acc [#acc + 1] = "" -- fill up empty cells | |
end | |
i = i + 1 | |
end | |
return tableconcat (acc, " & ") .. [[ \\]] | |
end | |
local starttable = [[\begin {tabular} {%s}]] | |
local stoptable = [[\end {tabular}]] | |
local tabular_of = function (data, options) | |
local acc = { } | |
local nrows = #data | |
local ncols, tspec = make_tablespec (data, options) | |
acc [#acc + 1] = stringformat (starttable, tspec) | |
for i = 1, nrows do | |
acc [#acc + 1] = make_row (data [i], nrows) | |
end | |
acc [#acc + 1] = stoptable | |
if texsprint then | |
if options.dump then | |
texiowrite_nl (tableconcat (acc, "\n")) | |
end | |
texsprint (unpack (acc)) | |
else | |
texiowrite_nl (tableconcat (acc, "\n")) | |
end | |
return true | |
end | |
----------------------------------------------------------------------- | |
--- option handler | |
----------------------------------------------------------------------- | |
local commasplitter = lpeg.splitat "," | |
local handle_options = function (raw) | |
if raw == "" then | |
return { } | |
end | |
local options = { lpegmatch (commasplitter, raw) } | |
return tabletohash (options) | |
end | |
----------------------------------------------------------------------- | |
--- main handler | |
----------------------------------------------------------------------- | |
local urihandler = function (rawoptions, uri) | |
local options = handle_options (rawoptions) | |
local curi = rewrite_spreadsheet_uri (uri) | |
if not curi then | |
return false | |
end | |
local resolved, raw = get_raw_doc (curi, options.force) | |
if not resolved then | |
return false | |
end | |
local data = extract_data (raw) | |
if not data then | |
return false | |
end | |
return tabular_of (data, options) | |
end | |
packagedata.spreadsheet.urihandler = urihandler | |
--- test: | |
--- urihandler [[https://docs.google.com/spreadsheet/ccc?key=0Amykmqr4Of-MdEVIUUcyYld3WTJhZnJHRkgwSF9CaUE&usp=sharing]] | |
if not texsprint then --- running as script | |
if arg [1] then | |
return urihandler ("", arg [1]) | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment