Last active
December 15, 2015 20:40
-
-
Save pygy/5320097 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- A more complete version is now available here: | |
-- https://github.com/pygy/LuLPeg | |
-- PureLPeg.lua | |
-- a WIP LPeg implementation in pure Lua, by Pierre-Yves Gérardy | |
-- released under the Romantic WTF Public License (see the end of the file). | |
-- Captures and locales are not yet implemented, but the rest works quite well. | |
-- UTF-8 is supported out of the box | |
-- | |
-- PL.set_charset"UTF-8" | |
-- s = PL.S"ß∂ƒ©˙" | |
-- s:match"©" --> 3 (since © is two bytes wide). | |
-- | |
-- More encodings can be easily added (see the charset section), by adding a | |
-- few appropriate functions. | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- Shorthands------------------------------------------------------------------ | |
------------------------------------------------------------------------------- | |
local t_concat, t_insert, t_remove | |
, t_sort, t_unpack | |
= table.concat, table.insert, table.remove | |
, table.sort, table.unpack or unpack | |
local s_byte, s_char | |
= string.byte, string.char | |
local m_max, m_min | |
= math.max, math.min | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- Helpers -------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
local | |
function weakkey (t) | |
return setmetatable(t,{__mode=="k"}) | |
end | |
local | |
function map (ary, func, ...) | |
local res = {} | |
for i = 1,#ary do | |
res[i] = func(ary[i], ...) | |
end | |
return res | |
end | |
local | |
function map_all (tbl, func, ...) | |
local res = {} | |
for k, v in next, tbl do | |
res[k]=func(v, ...) | |
end | |
return res | |
end | |
local | |
function fold (ary, func, acc) | |
local i0 = 1 | |
if not acc then | |
acc = ary[1] | |
i0 = 2 | |
end | |
for i = i0, #ary do | |
acc = func(acc,ary[i]) | |
end | |
return ary | |
end | |
local | |
function zip(a1, a2) | |
local res, len = {}, m_max(#a1,#a2) | |
for i = 1,len do | |
res[i] = {a1[i], a2[i]} | |
end | |
return res | |
end | |
local | |
function zip_all(t1, t2) | |
local res = {} | |
for k,v in pairs(t1) do | |
res[k] = {v, t2[k]} | |
end | |
for k,v in pairs(t2) do | |
if res[k] == nil then | |
res[k] = {t1[k], v} | |
end | |
end | |
return res | |
end | |
local | |
function filter(a1,func) | |
local res = {} | |
for i = 1,#ary do | |
if func(ary[i]) then | |
t_insert(res, ary[i]) | |
end | |
end | |
end | |
local function id (...) return ... end | |
local function nop()end | |
local function AND (a,b) return a and b end | |
local function OR (a,b) return a or b end | |
local function copy (tbl) return map_all(tbl, id) end | |
local function all (ary) return fold(ary,AND) end | |
local function any (ary) return fold(ary,OR) end | |
local function lt128(val) return val < 128 end | |
--- Sets, From PiL: | |
-- | |
local set_mt = {} | |
local | |
function newset (t) | |
local set = {} | |
setmetatable(set, set_mt) | |
for _, l in ipairs(t) do set[l] = true end | |
return set | |
end | |
local | |
function set_union (a,b) | |
local res = newset{} | |
for k in pairs(a) do res[k] = true end | |
for k in pairs(b) do res[k] = true end | |
return res | |
end | |
local | |
function set_tolist (s) | |
local list = {} | |
for el in pairs(s) do | |
t_insert(list,el) | |
end | |
return list | |
end | |
local | |
function set_isset (s) | |
return getmetatable(s) == set_mt | |
end | |
--- Ranges | |
-- | |
local range_mt = {} | |
local | |
function newrange (v1, v2) | |
if v1>v2 then | |
v1,v2 = v2,v1 | |
end | |
return setmetatable({v1,v2}, range_mt) | |
end | |
local | |
function range_overlap (r1, r2) | |
return r1[1] <= r2[2] and r2[1] <= r1[2] | |
end | |
local | |
function range_merge (r1, r2) | |
if not range_overlap(r1, r2) then return nil end | |
local v1, v2 = | |
r1[1] < r2[1] and r1[1] or r2[1], | |
r1[2] > r2[2] and r1[2] or r2[2] | |
return newrange(v1,v2) | |
end | |
local | |
function range_isrange (r) | |
return getmetatable(r) == range_mt | |
end | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- CharSet handling ----------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
--- UTF-8 | |
-- | |
-- We provide: | |
-- * utf8_validate(subject, start, finish) -- validator | |
-- * utf8_split_int(subject) --> table{int} | |
-- * utf8_split_char(subject) --> table{char} | |
-- * utf8_next_int(subject, index) -- iterator | |
-- * utf8_next_char(subject, index) -- iterator | |
-- * utf8_get_int(subject, index) -- Julia-style iterator | |
-- * utf8_get_char(subject, index) -- Julia-style iterator | |
-- | |
-- See each function for usage. | |
-- Utility function. | |
-- Modified from code by Kein Hong Man <[email protected]>, | |
-- found at http://lua-users.org/wiki/SciteUsingUnicode. | |
local | |
function utf8_offset (byte) | |
if byte < 128 then return 0, byte | |
elseif byte < 192 then | |
error("Byte values between 0x80 to 0xBF cannot start a multibyte sequence") | |
elseif byte < 224 then return 1, byte - 192 | |
elseif byte < 240 then return 2, byte - 224 | |
elseif byte < 248 then return 3, byte - 240 | |
elseif byte < 252 then return 4, byte - 248 | |
elseif byte < 254 then return 5, byte - 252 | |
else | |
error("Byte values between 0xFE and OxFF cannot start a multibyte sequence") | |
end | |
end | |
--[[ | |
validate a given (sub)string. | |
returns two values: | |
* The first is either true, false or nil, respectively on success, error, or | |
incomplete subject. | |
* The second is the index of the last byte of the last valid char. | |
--]] | |
local | |
function utf8_validate (subject, start, finish) | |
start = start or 1 | |
finish = finish or #subject | |
local offset, char | |
= 0 | |
for i = start,finish do | |
b = s_byte(subject,i) | |
if offset == 0 then | |
char = i | |
success, offset = pcall(utf8_offset, b) | |
if not success then return false, char - 1 end | |
else | |
if not (127 < b and b < 192) then | |
return false, char - 1 | |
end | |
offset = offset -1 | |
end | |
end | |
if offset ~= 0 then return nil, char - 1 end -- Incomplete input. | |
return true, finish | |
end | |
--[[ | |
Usage: | |
for _end, start, cpt in utf8_next_int, "˙†ƒ˙©√" do | |
print(cpt) | |
end | |
`start` and `_end` being the bounds of the character, and `cpt` being the UTF-8 code point. | |
It produces: | |
729 | |
8224 | |
402 | |
729 | |
169 | |
8730 | |
--]] | |
local | |
function utf8_next_int (subject, i) | |
i = i and i+1 or 1 | |
if i > #subject then return end | |
local c = s_byte(subject, i) | |
local offset, val = utf8_offset(c) | |
for i = i+1, i+offset do | |
c = s_byte(subject, i) | |
val = val * 64 + (c-128) | |
end | |
return i + offset, i, val | |
end | |
--[[ | |
Usage: | |
for _end, start, cpt in utf8_next_int, "˙†ƒ˙©√" do | |
print(cpt) | |
end | |
`start` and `_end` being the bounds of the character, and `cpt` being the UTF-8 code point. | |
It produces: | |
˙ | |
† | |
ƒ | |
˙ | |
© | |
√ | |
--]] | |
local | |
function utf8_next_char (subject, i) | |
i = i and i+1 or 1 | |
if i > #subject then return end | |
local offset = utf8_offset(s_byte(subject,i)) | |
return i + offset, i, s_sub(subject, i, i + offset) | |
end | |
--[[ | |
Takes a string, returns an array of code points. | |
--]] | |
local | |
function utf8_split_int (subject) | |
local chars = {} | |
for _, _, c in utf8_next_int, subject do | |
t_insert(chars,c) | |
end | |
return chars | |
end | |
--[[ | |
Takes a string, returns an array of characters. | |
--]] | |
local | |
function utf8_split_char (subject) | |
local chars = {} | |
for _, _, c in utf8_next_char, subject do | |
t_insert(chars,c) | |
end | |
return chars | |
end | |
local | |
function utf8_get_int(subject, i) | |
if i > #subject then return end | |
local c = s_byte(subject, i) | |
local offset, val = utf8_offset(c) | |
for i = i+1, i+offset do | |
c = s_byte(subject, i) | |
val = val * 64 + ( c - 128 ) | |
end | |
return val, i + offset + 1 | |
end | |
local | |
function utf8_get_char(subject, i) | |
if i > #subject then return end | |
local offset = utf8_offset(s_byte(subject,i)) | |
return s_sub(subject, i, i + offset), i + offset + 1 | |
end | |
--- ASCII and binary. | |
-- | |
-- See UTF-8 above for the API docs. | |
local | |
function ascii_validate (subject, start, finish) | |
start = start or 1 | |
finish = finish or #subject | |
for i = start,finish do | |
b = s_byte(subject,i) | |
if b > 127 then return false, i - 1 end | |
end | |
return true, finish | |
end | |
local | |
function printable_ascii_validate (subject, start, finish) | |
start = start or 1 | |
finish = finish or #subject | |
for i = start,finish do | |
b = s_byte(subject,i) | |
if 32 > b or b >127 then return false, i - 1 end | |
end | |
return true, finish | |
end | |
local | |
function binary_validate (subject, start, finish) | |
start = start or 1 | |
finish = finish or #subject | |
return true, finish | |
end | |
local | |
function binary_next_int (subject, i) | |
i = i and i+1 or 1 | |
if i >= #subject then return end | |
return i, i, s_sub(subject, i, i) | |
end | |
local | |
function binary_next_char (subject, i) | |
i = i and i+1 or 1 | |
if i > #subject then return end | |
return i, i, s_byte(subject,i) | |
end | |
local | |
function binary_split_int (subject) | |
local chars = {} | |
for i = 1, #subject do | |
t_insert(chars, s_byte(subject,i)) | |
end | |
return chars | |
end | |
local | |
function binary_split_char (subject) | |
local chars = {} | |
for i = 1, #subject do | |
t_insert(chars, s_sub(subject,i,i)) | |
end | |
return chars | |
end | |
local | |
function binary_get_int(subject, i) | |
return s_byte(subject, i), i + 1 | |
end | |
local | |
function binary_get_char(subject, i) | |
return s_sub(subject, i, i), i + 1 | |
end | |
--- The table | |
-- | |
local CharSets = { | |
binary = { | |
validate = binary_validate, | |
split_char = binary_split_char, | |
split_int = binary_split_int, | |
next_char = binary_next_char, | |
next_int = binary_next_int, | |
get_char = binary_get_char, | |
get_int = binary_get_int | |
}, | |
ASCII = { | |
validate = ascii_validate, | |
split_char = binary_split_char, | |
split_int = binary_split_int, | |
next_char = binary_next_char, | |
next_int = binary_next_int, | |
get_char = binary_get_char, | |
get_int = binary_get_int | |
}, | |
["printable ASCII"] = { | |
validate = printable_ascii_validate, | |
split_char = binary_split_char, | |
split_int = binary_split_int, | |
next_char = binary_next_char, | |
next_int = binary_next_int, | |
get_char = binary_get_char, | |
get_int = binary_get_int | |
}, | |
["UTF-8"] = { | |
validate = utf8_validate, | |
split_char = utf8_split_char, | |
split_int = utf8_split_int, | |
next_char = utf8_next_char, | |
next_int = utf8_next_int, | |
get_char = utf8_get_char, | |
get_int = utf8_get_int | |
} | |
} | |
local charset, validate, split_int, split_char | |
, next_int, next_char, get_int, get_char | |
= "binary" | |
, binary_validate | |
, binary_split_int | |
, binary_split_char | |
, binary_next_int | |
, binary_next_char | |
, binary_get_int | |
, binary_get_char | |
local | |
function PL_set_charset(set) | |
local s = CharSets[set] | |
if s then | |
charset, validate, split_int, split_char | |
, next_int, next_char, get_int, get_char | |
= s, s.validate, s.split_int, s.split_char | |
, s.next_int, s.next_char, s.get_int, s.get_char | |
else | |
error("Bad Charset: " .. tostring(s)) | |
end | |
function PL.setCharset() | |
error("Charsets are forever (attempt to redefine the charset).") | |
end | |
end | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- The module ----------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
local PL = {} | |
PL.__index = PL | |
PL.version = function() return "v0.0.0" end | |
PL.setmaxstack = nop --Just a stub. Is it even necessary for this version? | |
local | |
function PL_type(pt) | |
if getmetatable(pt) == PL then | |
return "pattern" | |
else | |
return nil | |
end | |
end | |
PL.type = PL_type | |
PL.set_charset = PL_set_charset | |
PL.charsets = CharSets | |
--- Lua 5.1/5.2 compat | |
-- | |
local | |
function newpattern(pt) | |
return setmetatable(pt,PL) | |
end | |
do | |
if newproxy then | |
local proxycache = weakkey{} | |
local __index_PL = {__index = PL} | |
PL.proxycache = proxycache | |
newpattern = function(cons) | |
local pt = newproxy() | |
setmetatable(cons, __index_PL) | |
proxycache[pt]=cons | |
debug.setmetatable(pt,PL) | |
return pt | |
end | |
function PL:__index(k) | |
return proxycache[self][k] | |
end | |
function PL:__newindex(k, v) | |
proxycache[self][k] = v | |
end | |
elseif #setmetatable({},{__len = function()return 10 end}) ~= 10 then | |
print("Warning: The `__len` metatethod won't work with patterns, " | |
.."use `PL.L(pattern)` for lookaheads.") | |
end | |
end | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- Constructors --------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
local ptcache, backcache, setcache, rangecache | |
, refcache, repeatcache, unmcache, lookaheadcache | |
= weakkey{}, weakkey{}, weakkey{}, weakkey{} | |
, weakkey{}, weakkey{}, weakkey{}, weakkey{} | |
local constructors = {} | |
-- The interface between the API (lpeg.P, lpeg.V, lpeg.C...), the main | |
-- pattern cache and the pattern constructors. | |
local function construct (typ, v, ...) | |
-- This line also handles `P(pattern)` -> pattern. | |
--See `ptcache[pt] = pt` below. | |
if ptcache[v] then return ptcache[v] end | |
local pt = constructors[typ] | |
and constructors[typ](v, ...) | |
or error("No pattern constructor for type '".. typ.."'.") | |
ptcache[pt] = pt | |
return pt | |
end | |
-- constant patterns | |
local truept, falsept, eospt, onept = | |
newpattern{type = "true"}, | |
newpattern{type = "false"}, | |
newpattern{type = "eos"}, | |
newpattern{type = "one"} | |
constructors["string"] = function(s) | |
local success = validate(s) | |
if success then | |
local pt = newpattern{ | |
type = "string", | |
data = binary_split_int(s), | |
as_is = s | |
} | |
ptcache[s] = pt | |
return pt | |
else | |
error("Character at position ".. index+1 | |
.." is not a valid "..charset.." one.") | |
end | |
end | |
constructors["number"] = function(n) | |
local pt | |
if n == 0 then | |
pt = truept | |
elseif n == 1 then | |
pt = onept | |
elseif n == -1 then | |
pt = eospt | |
elseif n > 0 then | |
pt = newpattern{ | |
type = "any", | |
data = n | |
} | |
else | |
pt = -newpattern{ | |
type = "any", | |
data = -n | |
} | |
end | |
ptcache[n] = pt | |
return pt | |
end | |
constructors["boolean"] = function(b) | |
return b and truept or falsept | |
end | |
-- Grammar | |
constructors["table"] = function(g) | |
g = copy(g) | |
if type(g[1]) == string then g[1] = PL.V(g[1]) end | |
return newpattern{ | |
type = "grammar", | |
data = g | |
} | |
end | |
constructors["function"] = function(f) | |
local c = newpattern{ | |
type = "function", | |
data = f | |
} | |
ptcache[f] = c | |
return c | |
end | |
-- B"abc" | |
constructors["back"] = function(_, s) | |
if backcache[s] then | |
return backcache[s] | |
end | |
local success, index = validate(s) | |
if success then | |
local pt = newpattern{ | |
type = "back", | |
data = binary_split_int(s), | |
as_is = s | |
} | |
backcache[s] = pt | |
return pt | |
else | |
error("Character at position ".. index+1 | |
.." is not a valid "..charset.." one.") | |
end | |
end | |
-- R"az" | |
constructors["range"] = function(_, r) | |
local as_is = r | |
if not range_isrange(r) then | |
local success, index = validate(r) | |
if not success then | |
error("Character at position ".. index+1 | |
.." is not a valid "..charset.." one.") | |
end | |
r = newrange(t_unpack(split_int(r))) | |
end | |
local cacheKey = r[1]..":"..r[2] | |
if rangecache[cacheKey] then | |
return rangecache[cacheKey] | |
else | |
local pt = newpattern{ | |
type = "range", | |
data = r, | |
as_is = as_is | |
} | |
rangecache[cacheKey] = pt | |
return pt | |
end | |
end | |
-- S"abc" | |
constructors["set"] = function(_, s) | |
local as_is = s | |
if set_isset(s) then | |
s = set_tolist(s) | |
else | |
local success, index = validate(s) | |
if not success then | |
error("Character at position ".. index+1 | |
.." is not a valid "..charset.." one.") | |
end | |
s = split_int(s) | |
end | |
t_sort(s) | |
local cacheKey = t_concat(s,"$") | |
if setcache[cacheKey] then | |
return setcache[cacheKey] | |
else | |
local pt = newpattern{ | |
type = "set", | |
data = newset(s), | |
as_is = as_is | |
} | |
setcache[cacheKey] = pt | |
return pt | |
end | |
end | |
-- V"name" | |
constructors["ref"] = function(_, name) | |
local cacheKey = "Ref: "..name | |
if refcache[cacheKey] then | |
return refcache[cacheKey] | |
else | |
local pt = newpattern{ | |
type = "ref", | |
data = name | |
} | |
refcache[cacheKey] = pt | |
return pt | |
end | |
end | |
-- p1 + p2 | |
constructors["alternate"] = function(_, alt) | |
return newpattern{ | |
type = "alternate", | |
data = alt | |
} | |
end | |
-- p1 * p2 | |
constructors["sequence"] = function(_, seq) | |
return newpattern{ | |
type = "sequence", | |
data = seq | |
} | |
end | |
-- pt^n | |
constructors["repeat"] = function(_, p, n) | |
repeatcache[n] = repeatcache[n] or {} | |
if repeatcache[n][p] then | |
return repeatcache[n][p] | |
else | |
local pt = newpattern{ | |
type = "repeat", | |
data = p, | |
times = n | |
} | |
repeatcache[n][p] = pt | |
return pt | |
end | |
end | |
-- -pt | |
constructors["unm"] = function(_, p) | |
if unmcache[p] then | |
return unmcache[p] | |
else | |
local pt = newpattern{ | |
type = "unm", | |
data = p | |
} | |
unmcache[p] = pt | |
return pt | |
end | |
end | |
-- #pt | |
constructors["lookahead"] = function(_, p) | |
if lookaheadcache[p] then | |
return lookaheadcache[p] | |
else | |
local pt = newpattern{ | |
type = "lookahead", | |
data = p | |
} | |
lookaheadcache[p] = pt | |
return pt | |
end | |
end | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- API ------------------------------------------------------------------------ | |
------------------------------------------------------------------------------- | |
local | |
function PL_P (v) | |
return construct(type(v), v) | |
end | |
PL.P = PL_P | |
local | |
function PL_S (set) | |
if set == "" then | |
return PL_P(false) | |
else | |
return construct("set", nil, set) | |
end | |
end | |
PL.S = PL_S | |
local | |
function PL_R (...) | |
if select('#', ...) == 0 then | |
return PL_P(false) | |
else | |
local r = ... | |
return construct("range", nil, r) | |
end | |
end | |
PL.R = PL_R | |
local | |
function PL_V (name) | |
return construct("ref", nil, name) | |
end | |
PL.V = PL_V | |
local | |
function PL_B (name) | |
return construct("back", nil, name) | |
end | |
PL.B = PL_B | |
local | |
function sequence (a,b) | |
a,b = PL_P(a), PL_P(b) | |
local seq1, seq2 = {}, {} | |
-- A few optimizations: | |
-- 1. flatten the sequence (a * b) * (c * d) => a * b * c * d | |
for _, p in ipairs{a,b} do | |
if p.type == "sequence" then | |
for _, q in ipairs(p.data) do | |
t_insert(seq1, q) | |
end | |
else | |
t_insert(seq1,p) | |
end | |
end | |
-- Concatenate `string` and `any` patterns. | |
-- TODO: Repeat patterns? | |
seq2[1] = seq1[1] | |
for i = 2,#seq1 do | |
local p1, p2 = seq2[#seq2], seq1[i] | |
if p1.type == "string" and p2.type == "string" then | |
seq2[#seq2] = PL_P(p1.as_is .. p2.as_is) | |
elseif p1.type == "any" and p2.type == "any" then | |
seq2[#seq2] = P(p1.data + p2.data) | |
else | |
t_insert(seq2,p2) | |
end | |
end | |
if #seq2 == 1 | |
then return seq2[1] | |
else return construct("sequence", _, seq2) end | |
end | |
PL.__mul = sequence | |
local | |
function PL_alternate (a,b) | |
a,b = PL_P(a), PL_P(b) | |
local alt1, alt2 = {}, {} | |
-- A few optimizations: | |
-- 1. flatten (a + b) + (c + d) => a + b + c + d | |
for _, p in ipairs{a,b} do | |
if p.type == "alternate" then | |
for _, q in ipairs(p.data) do | |
t_insert(alt1, q) | |
end | |
else | |
t_insert(alt1,p) | |
end | |
end | |
-- Merge `set` patterns. | |
-- TODO: merge captures who share the same structure? | |
-- so that C(P1) + C(P2) become C(P1+P2)? | |
alt2[1] = alt1[1] | |
for i = 2,#alt1 do | |
local p1, p2 = alt2[#alt2], alt1[i] | |
if p1.type == "set" and p2.type == "set" then | |
alt2[#alt2] = PL_S(p1.as_is..p2.as_is) | |
else | |
t_insert(alt2,p2) | |
end | |
end | |
if #alt2 == 1 | |
then return alt2[1] | |
else return construct("alternate", _, alt2) end | |
end | |
PL.__add = PL_alternate | |
local | |
function PL_lookahead (pt) | |
return construct("lookahead", _, pt) | |
end | |
PL.__len = PL_lookahead | |
PL.L = PL_lookahead | |
local | |
function PL_unm(pt) | |
return construct("unm", _, pt) | |
end | |
PL.__unm = PL_unm | |
local | |
function PL_sub (a, b) | |
a, b = PL_P(a), PL_P(b) | |
return PL_unm(b) * a | |
end | |
PL.__sub = PL_sub | |
local | |
function PL_repeat (pt,n) | |
if PL_type(n) == "pattern" then | |
error"Pattern encountered at the wrong side of ^." | |
end | |
return construct("repeat", _, pt, n) | |
end | |
PL.__pow = PL_repeat | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- Compilers ------------------------------------------------------------------ | |
------------------------------------------------------------------------------- | |
local compilers = {} | |
local function compile(pt, cpstate) | |
if PL_type(pt) ~= "pattern" then error("pattern expected") end | |
if not pt.compiled then | |
pt.compiled = compilers[pt.type](pt, cpstate) | |
end | |
return pt.compiled | |
end | |
compilers["string"] = function (pt) | |
local S, N = pt.data | |
local N = #S | |
return function(subject, index, state) | |
local in_1 = index - 1 | |
for i = 1, N do | |
local c | |
c = s_byte(subject,in_1 + i) | |
if c ~= S[i] then | |
return false, index | |
end | |
end | |
return true, index + N | |
end | |
end | |
local | |
function truecompiled (subject, index, state) | |
return true, index | |
end | |
compilers["true"] = function (pt) | |
return truecompiled | |
end | |
local | |
function falsecompiled (subject, index, state) | |
return false, index | |
end | |
compilers["false"] = function (pt) | |
return falsecompiled | |
end | |
local | |
function eoscompiled (subject, index, state) | |
return index > #subject, index | |
end | |
compilers["eos"] = function (pt) | |
return eoscompiled | |
end | |
local | |
function onecompiled (subject, index, state) | |
local char, nindex = get_int(subject, index) | |
if char | |
then return true, nindex | |
else return flase, index end | |
end | |
compilers["one"] = function (pt) | |
return onecompiled | |
end | |
compilers["any"] = function (pt) | |
if charset == "UTF-8" then | |
local N = pt.data | |
return function(subject,index,state) | |
local n, c, nindex = N | |
while n > 0 do | |
c, nindex = get_int(subject, index) | |
if not c then return false, index end | |
n = n -1 | |
end | |
return true, nindex | |
end | |
else -- version optimized for byte-width encodings. | |
local N = pt.data - 1 | |
return function(subject, index, state) | |
local n = index + N | |
if n <= #subject then | |
return true, n + 1 | |
else | |
return false, index | |
end | |
end | |
end | |
end | |
-- type = "grammar", | |
compilers["grammar"] = function (pt) | |
local gram = map_all(pt.data, compile) | |
local start = gram[1] | |
return function (subject, index, state) | |
t_insert(state.grammars, gram) | |
local success, nindex = start(subject, index, state) | |
t_remove(state.grammars) | |
return success, nindex | |
end | |
end | |
compilers["function"] = function (pt) | |
local func = pt.data | |
return function (subject, index) | |
local results = {func(subject, index)} | |
local nindex = results[1] | |
if nindex == true then | |
return true, index | |
elseif type(nindex) == "number" then | |
if index <= nindex and nindex <= #subject then | |
return true, nindex | |
else | |
error"Index out of bounds returned by match-time capture." | |
end | |
elseif not nindex then | |
return false, index | |
else | |
error"Match time capture must return a number, a boolean, nil or nothing." | |
end | |
end | |
end | |
compilers["back"] = function (pt) | |
local S, N = pt.data | |
local N = #S | |
return function (subject, index, state) | |
local start = index - N - 1 | |
for i = 1, N do | |
local c | |
c = s_byte(subject, start + i) | |
if c ~= S[i] then | |
return false, index | |
end | |
end | |
return true, index | |
end | |
end | |
compilers["range"] = function (pt) | |
local r = pt.data | |
local b1, b2 = r[1], r[2] | |
return function (subject, index, state) | |
local char, nindex = get_int(subject, index) | |
if char and b1 <= char and char <= b2 | |
then return true, nindex | |
else return false, index end | |
end | |
end | |
compilers["set"] = function (pt) | |
local s = pt.data | |
return function (subject, index, state) | |
local char, nindex = get_int(subject, index) | |
if s[char] | |
then return true, nindex | |
else return false, index end | |
end | |
end | |
compilers["ref"] = function (pt) | |
local name = pt.data | |
local ref | |
return function (subject, index, state) | |
if not ref then | |
ref = state.grammars[#state.grammars][name] | |
end | |
return ref(subject, index, state) | |
end | |
end | |
compilers["alternate"] = function (pt) | |
local alternatives, n = map(pt.data, compile), #pt.data | |
return function (subject, index, state) | |
local success | |
for i = 1, n do | |
success, index = alternatives[i](subject, index, state) | |
if success | |
then return true, index end | |
end | |
return false, index | |
end | |
end | |
compilers["sequence"] = function (pt) | |
local sequence, n = map(pt.data, compile), #pt.data | |
return function (subject, index, state) | |
local success | |
for i = 1, n do | |
success, index = sequence[i](subject, index, state) | |
if not success | |
then return false, index end | |
end | |
return true, index | |
end | |
end | |
compilers["repeat"] = function (pt) | |
local matcher, n = compile(pt.data), pt.times | |
if n >= 0 then | |
return function (subject, index, state) | |
local success = true | |
for i = 1, n do | |
success, index = matcher(subject, index, state) | |
if not success then return false, index end | |
end | |
while success do | |
success, index = matcher(subject, index, state) | |
end | |
return true, index | |
end | |
else | |
n = -n | |
return function (subject, index, state) | |
local success = true | |
for i = 1, n do | |
success, index = matcher(subject, index, state) | |
end | |
return true, index | |
end | |
end | |
end | |
compilers["unm"] = function (pt) | |
local matcher = compile(pt.data) | |
return function (subject, index, state) | |
local success, _ = matcher(subject, index, state) | |
return not success, index | |
end | |
end | |
compilers["lookahead"] = function (pt) | |
local matcher = compile(pt.data) | |
return function (subject, index, state) | |
local success, _ = matcher(subject, index, state) | |
return success, index | |
end | |
end | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- Match ---------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
local | |
function PL_match(pt, subject, index, ...) | |
pt = PL_P(pt) | |
if index == nil then | |
index = 1 | |
elseif type(index) ~= "number" then | |
error"The index must be a number" | |
elseif not (1 <= index and index <= #subject) then | |
error("Index "..index.." out of bounds [1.."..#subject.."].") | |
end | |
local matcher, state | |
= compile(pt) | |
, {grammars = {}, args = {...}} | |
local success, index = matcher(subject, index, state) | |
if success | |
then return index | |
else return nil end | |
end | |
PL.match = PL_match | |
------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
-- Print ---------------------------------------------------------------------- | |
------------------------------------------------------------------------------- | |
local printers, PL_print = {} | |
printers["string"] = function (pt, offset, prefix) | |
print(offset..prefix.."P\""..pt.as_is.."\"") | |
end | |
printers["true"] = function (pt, offset, prefix) | |
print(offset..prefix.."P( true )") | |
end | |
printers["false"] = function (pt, offset, prefix) | |
print(offset..prefix.."P( false )") | |
end | |
printers["eos"] = function (pt, offset, prefix) | |
print(offset..prefix.."P( -1 )") | |
end | |
printers["one"] = function (pt, offset, prefix) | |
print(offset..prefix.."P( 1 )") | |
end | |
printers["any"] = function (pt, offset, prefix) | |
print(offset..prefix.."P( "..pt.data.." )") | |
end | |
printers["grammar"] = function (pt, offset, prefix) | |
print(offset..prefix.."Grammar") | |
for k, pt in pairs(pt.data) do | |
local prefix = ( type(k)~="string" | |
and type(k)..": " .. k | |
or "\""..k.."\"" ) | |
PL_print(pt, offset.." ", prefix " = ") | |
end | |
end | |
printers["function"] = function (pt, offset, prefix) | |
print(offset..prefix.."P( "..pt.data.." )") | |
end | |
printers["back"] = function (pt, offset, prefix) | |
print(offset..prefix.."B( "..pt.as_is.." )") | |
end | |
printers["range"] = function (pt, offset, prefix) | |
print(offset..prefix.."R( "..pt.as_is.." )") | |
end | |
printers["set"] = function (pt, offset, prefix) | |
print(offset..prefix.."S( "..pt.as_is.." )") | |
end | |
printers["ref"] = function (pt, offset, prefix) | |
local val = ( type(k)~="string" | |
and type(k)..": " .. pt.data | |
or "\""..pt.data.."\"" ) | |
print(offset..prefix.."V( "..val.." )") | |
end | |
printers["alternate"] = function (pt, offset, prefix) | |
print(offset..prefix.."+") | |
map(pt.data, PL_print, offset.." ", "") | |
end | |
printers["sequence"] = function (pt, offset, prefix) | |
print(offset..prefix.."*") | |
map(pt.data, PL_print, offset.." ", "") | |
end | |
printers["repeat"] = function (pt, offset, prefix) | |
PL_print(pt.data, offset, pt.times.." ^ ") | |
end | |
printers["unm"] = function (pt, offset, prefix) | |
PL_print(pt.data, offset, "- ") | |
end | |
printers["lookahead"] = function (pt, offset, prefix) | |
PL_print(pt.data, offset, "# ") | |
end | |
function PL_print (pt, offset, prefix) | |
return printers[pt.type](pt, offset, prefix) | |
end | |
function PL.print (pt) | |
return printers[pt.type](pt, "", "") | |
end | |
return PL | |
-- The Romantic WTF public license. | |
-- -------------------------------- | |
-- a.k.a. version "<3" or simply v3 | |
-- | |
-- | |
-- Dear user, | |
-- | |
-- The PureLPeg proto-library | |
-- | |
-- \ | |
-- '.,__ | |
-- \ / | |
-- '/,__ | |
-- / | |
-- / | |
-- / | |
-- has been / released | |
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
-- under the Romantic WTF Public License. | |
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~`,´ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
-- I hereby grant you an irrevocable license to | |
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
-- do what the gentle caress you want to | |
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
-- with this lovely | |
-- ~ ~ ~ ~ ~ ~ ~ ~ | |
-- / thing... | |
-- / ~ ~ ~ ~ | |
-- / Love, | |
-- # / '.' | |
-- ####### · | |
-- ##### | |
-- ### | |
-- # | |
-- | |
-- -- Pierre-Yves | |
-- | |
-- | |
-- P.S.: Even though I poured my heart into this work, | |
-- I _cannot_ provide any warranty regarding | |
-- its fitness for _any_ purpose. You | |
-- acknowledge that I will not be held liable | |
-- for any damage its use could incur. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment