Skip to content

Instantly share code, notes, and snippets.

@timm
Last active June 7, 2024 04:06
Show Gist options
  • Save timm/5472f90e896d57e7e86611c238942e4e to your computer and use it in GitHub Desktop.
Save timm/5472f90e896d57e7e86611c238942e4e to your computer and use it in GitHub Desktop.
ruler.lua

ruler.lua

Make Rules from Bayes Classifier

This code

  • functions should be 5 lines long, or less
  • in function args, 2 spaces denotes "start of optionals" and 4 spaced denotes "start of locals"
  • portable style (so you can recode this in your favorite language) no lua metatables.
  • x = anything
  • fun or hook = function
  • n = number
  • s = string
  • t = table
  • u = a list generated from t
  • it= iterator; e.g itsRows
  • isX = a boolean
  • function XX(...) end is a constructor of things of type xx
  • function _XX(...) end is a primitive constructor that should only ever bye called by the real constructor XX(...)

Types

-- - lists have numeric integers.
-- - dicts have key indexes.
-- - table = list | dict
-- - number = float | init
-- - atom = str | bool | number -- - COL = NUM | SYM
-- - row = list[atom]
-- - rows = list[rows]
-- - klasses = dict[src,rows]

  • real, int, string, bool
  • num = NUM
  • sym = SYM
  • col = NUM | SYM
  • cols = COLS
  • data = DATA
#!/usr/bin/env lua
-- <!-- vim: set ts=2 sw=2 sts=2 et: -->
local l = require"lib"
local the = l.settings[[
ruler.lua find coll stuff
USAGE:
./ruler.lua [OPTIONS] -R [start]
SETTINGS:
-s --seed = 123456891
-b --bins = 7
-n --ndecs = 3
-R --run = nothing
-t --train = ../ezr/data/misc/auto93.csv]]
local big, is,o, oo = l.big, l.is, l.o, l.oo
local NUM, SYM, COL, DATA, COLS = is"NUM", is"SYM", is"SYM", is"DATA", is"COLS"
-- ------------------------------------------------
function NUM.new() --> num
return {at=0, txt="", n=0, mu=0, m2=0, sd=0, lo=big, hi=-big, want=1} end
function SYM.new() --> sym
return {at=0, txt="", n=0, has={}, most=0, mode=nil} end
function DATA.new() --> data
return {rows={}, cols=nil, callback=fun or function(...) end} end
function COLS.new() --> cols
return {names={}, all={}, x={}, y={}} end
-- ------------------------------------------------
function COLS:init(ts, col) --> cols
self.names=ts
for n,s in pairs(self.names) do
col = s:find"^[A-Z]" and NUM() or SYM()
col.n, col.s, col.want = n, s, (s:find"-$" and 0 or 1)
push(self.all, push(s:find"[!+-]" and self.y or self.x, col))
if s:find"!$" then self.klass=col end end end
function DATA:init(x, isSorted,fun, data) --> data
self.callback = fun
if type(x)=="string"
then for t in csv(x) do self:add(t) end
else for _,t in pairs(x or {}) do self:add(t) end end
if isSorted then
table.sort(data.rows,
function(t1,t2) return self:chebyshev(t1) < self:chebyshev(t2) end) end end
-- ----------------------------------------------------------
function DATA:add(t) --> nil
if self.cols
then self.callback(self,t)
for _,col in pairs(self.cols.all) do col.add(t[col.at]) end
push(self.rows,t)
else self.cols = COLS(t) end end
function adds(col,t) --> num |sym
for _,x in pairs(t) do col:add(x) end; return col end
function NUM:add(n, d) --> nil
if n ~= "?" then
self.n = sel.n + 1
self.lo = math.min(self.lo, n)
self.hi = math.max(self.hi, n)
d = n - self.mu
self.mu = self.mu + d/self.n
self.m2 = self.m2 + d*(n - self.mu)
self.sd = self.n < 2 and 0 or (self.m2/(self.n - 1))^0.5 end end
function SYM:add(s) --> nil
if s ~= "?" then
self.has[s] = 1 + (self.has[s] or 0)
if self.has[s] > self.most then self.mode, self.most = s,self.has[s] end end end
-- -------------------------------------------------
function NUM:mid() return self.mu end
function SYM:mid() return self.mode end
function NUM:div() return self.sd end
function SYM:div() return l.ent(self.has) end
function NUM:norm(x) return x=="?" and x or (x - self.lo)/(self.hi - self.lo + 1/big) end
function DATA:chebyshev(t, n) --> real
n = 0
for _,y in pairs(self.cols.y) do n= math.max(n, math.abs(y.want - y:norm(t[y.at]))) end
return n end
-- -------------------------------------------------
local eg={}
eg["-v"] = function() print("ruler.lua version 0.1") end
function eg.num( num)
num = NUM()
for _=1,10^4 do toCol(num, math.random()^2) end
oo(num) end
function eg.csv( n)
n=0
for t in csv(the.train) do n=n+1; if n%40 == 0 then print(n, o(t)) end end end
-- -------------------------------------------------
if pcall(debug.getlocal, 4, 1)
then return {lib=lib, the=the, NUM=NUM, SYM=SYM, COL=COL, COLS=COLS}
else main(eg,the)
end
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>lib.lua</title>
<link rel="stylesheet" href="pycco.css">
</head>
<body>
<div id='container'>
<div id="background"></div>
<div class='section'>
<div class='docs'><h1>lib.lua</h1></div>
</div>
<div class='clearall'>
<div class='section' id='section-0'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-0'>#</a>
</div>
<!-- vim: set ts=2 sw=2 sts=2 et: -->
<p>lib.lua : msc lua tricks <br />
(c) Tim Menzies <a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#116;&#105;&#109;&#109;&#64;&#105;&#101;&#101;&#101;&#46;&#111;&#114;&#103;">&#116;&#105;&#109;&#109;&#64;&#105;&#101;&#101;&#101;&#46;&#111;&#114;&#103;</a> BSD2 </p>
</div>
<div class='code'>
<div class="highlight"><pre><span></span><span class="kd">local</span> <span class="n">l</span><span class="o">=</span><span class="p">{}</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-1'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-1'>#</a>
</div>
<hr />
<h2>My types</h2>
<p><code>number</code> = <code>float</code> | <code>init</code>
<code>atom</code> = <code>str</code> | <code>bool</code> | <code>number</code></p>
<p><code>list</code>s have numeric integers.<br />
<code>dict</code>s have key indexes.</p>
</div>
<div class='code'>
<div class="highlight"><pre></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-2'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-2'>#</a>
</div>
<hr />
<h2>Constants</h2>
</div>
<div class='code'>
<div class="highlight"><pre><span class="n">l</span><span class="p">.</span><span class="n">big</span> <span class="o">=</span> <span class="mf">1E30</span> <span class="c1">-- -&gt; float</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-3'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-3'>#</a>
</div>
<hr />
<h2>Linting</h2>
</div>
<div class='code'>
<div class="highlight"><pre></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-4'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-4'>#</a>
</div>
<p><code>rogues() -&gt; nil</code> <br />
Report anything not declared &ldquo;local&rdquo;.</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">rogues</span><span class="p">()</span> <span class="c1">-- () -&gt; nil</span>
<span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">x</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">_ENV</span><span class="p">)</span> <span class="kr">do</span>
<span class="kr">if</span> <span class="ow">not</span> <span class="n">l</span><span class="p">.</span><span class="n">b4</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="kr">then</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Rogue?&quot;</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-5'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-5'>#</a>
</div>
<p>Cache what is needed for <code>rogues()</code></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="n">l</span><span class="p">.</span><span class="n">b4</span><span class="o">=</span><span class="p">{};</span> <span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">_</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">_ENV</span><span class="p">)</span> <span class="kr">do</span> <span class="n">l</span><span class="p">.</span><span class="n">b4</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="o">=</span><span class="n">k</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-6'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-6'>#</a>
</div>
<hr />
<h2>String stuff</h2>
<h3>Thing to string</h3>
</div>
<div class='code'>
<div class="highlight"><pre></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-7'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-7'>#</a>
</div>
<p><code>fmt(str,x1,x2,...) -&gt; str</code></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="n">l</span><span class="p">.</span><span class="n">fmt</span> <span class="o">=</span> <span class="nb">string.format</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-8'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-8'>#</a>
</div>
<p><code>o(any) -&gt; str</code></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">o</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">u</span><span class="p">)</span>
<span class="kr">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="o">~=</span> <span class="s2">&quot;table&quot;</span> <span class="kr">then</span> <span class="kr">return</span> <span class="nb">tostring</span><span class="p">(</span><span class="n">l</span><span class="p">.</span><span class="n">rnd</span><span class="p">(</span><span class="n">t</span><span class="p">))</span> <span class="kr">end</span>
<span class="n">u</span><span class="o">=</span><span class="p">{};</span> <span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">v</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="kr">do</span>
<span class="n">v</span><span class="o">=</span><span class="n">l</span><span class="p">.</span><span class="n">o</span><span class="p">(</span><span class="n">v</span><span class="p">)</span>
<span class="n">u</span><span class="p">[</span><span class="mi">1</span><span class="o">+#</span><span class="n">u</span><span class="p">]</span> <span class="o">=</span> <span class="o">#</span><span class="n">t</span><span class="o">&gt;</span><span class="mi">0</span> <span class="ow">and</span> <span class="n">v</span> <span class="ow">or</span> <span class="n">fmt</span><span class="p">(</span><span class="s2">&quot;:%s %s&quot;</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">if</span> <span class="o">#</span><span class="n">t</span><span class="o">==</span><span class="mi">0</span> <span class="kr">then</span> <span class="nb">table.sort</span><span class="p">(</span><span class="n">u</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="s2">&quot;(&quot;</span><span class="o">..</span> <span class="nb">table.concat</span><span class="p">(</span><span class="n">u</span><span class="p">,</span><span class="s2">&quot; &quot;</span><span class="p">)</span> <span class="o">..</span><span class="s2">&quot;)&quot;</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-9'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-9'>#</a>
</div>
<h3>String to thing</h3>
</div>
<div class='code'>
<div class="highlight"><pre></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-10'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-10'>#</a>
</div>
<p><code>coerce(str) --&gt; atom</code></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">coerce</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">fun</span><span class="p">)</span>
<span class="n">fun</span> <span class="o">=</span> <span class="kr">function</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="kr">if</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;nil&quot;</span> <span class="kr">then</span> <span class="kr">return</span> <span class="kc">nil</span> <span class="kr">else</span>
<span class="kr">return</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;true&quot;</span> <span class="ow">or</span> <span class="p">(</span><span class="n">s</span> <span class="o">~=</span><span class="s2">&quot;false&quot;</span> <span class="ow">and</span> <span class="n">s</span><span class="p">)</span> <span class="ow">or</span> <span class="kc">false</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="nb">math.tointeger</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">tonumber</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="ow">or</span> <span class="n">fun</span><span class="p">(</span><span class="n">s</span><span class="p">:</span><span class="n">match</span><span class="s1">&#39;^%s*(.*%S)&#39;</span><span class="p">)</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-11'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-11'>#</a>
</div>
<p><code>values(str) --&gt; list[atom]</code></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">values</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
<span class="n">t</span><span class="o">=</span><span class="p">{}</span>
<span class="kr">for</span> <span class="n">s1</span> <span class="kr">in</span> <span class="n">s</span><span class="p">:</span><span class="n">gmatch</span><span class="p">(</span><span class="s2">&quot;([^,]+)&quot;</span><span class="p">)</span> <span class="kr">do</span> <span class="n">t</span><span class="p">[</span><span class="mi">1</span><span class="o">+#</span><span class="n">t</span><span class="p">]</span><span class="o">=</span><span class="n">coerce</span><span class="p">(</span><span class="n">s1</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">t</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-12'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-12'>#</a>
</div>
<p><code>csv(?str) -&gt; iterator -&gt; list[atom]</code> <br />
If <code>file</code> is nil, then read from sttdio</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">csv</span><span class="p">(</span> <span class="n">file</span><span class="p">,</span> <span class="n">fun</span><span class="p">,</span><span class="n">stream</span><span class="p">)</span>
<span class="n">stream</span> <span class="o">=</span> <span class="n">file</span> <span class="ow">and</span> <span class="nb">io.input</span><span class="p">(</span><span class="n">file</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">io.stdin</span>
<span class="kr">return</span> <span class="kr">function</span><span class="p">(</span> <span class="n">s</span><span class="p">)</span>
<span class="n">s</span><span class="o">=</span><span class="nb">io.read</span><span class="p">()</span>
<span class="kr">if</span> <span class="n">s</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">l</span><span class="p">.</span><span class="n">values</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="kr">else</span> <span class="nb">io.close</span><span class="p">(</span><span class="n">stream</span><span class="p">)</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-13'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-13'>#</a>
</div>
<hr />
<h2>Settings</h2>
</div>
<div class='code'>
<div class="highlight"><pre></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-14'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-14'>#</a>
</div>
<p><code>settings(str) --&gt; dict[str,atom]</code> <br />
Makes one <code>t[key]=coerce(default)</code> for all lines with <code>--key...=default</code>.</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">settings</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
<span class="n">t</span> <span class="o">=</span> <span class="p">{}</span>
<span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">s1</span> <span class="kr">in</span> <span class="n">s</span><span class="p">:</span><span class="n">gmatch</span><span class="p">(</span><span class="s2">&quot;[-][-]([%S]+)[^=]+=[%s]*([%S]+)&quot;</span><span class="p">)</span> <span class="kr">do</span> <span class="n">t</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="o">=</span><span class="n">coerce</span><span class="p">(</span><span class="n">s1</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">t</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-15'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-15'>#</a>
</div>
<p><code>cli(dict) -&gt; dict</code><br />
The value of <code>dict</code>&rsquo;s at <code>key</code> is updated<br>by command line flag <code>-k</code> or <code>--key</code>.</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">cli</span><span class="p">(</span><span class="n">t</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">s</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="kr">do</span>
<span class="n">s</span> <span class="o">=</span> <span class="nb">tostring</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">argv</span><span class="p">,</span><span class="n">arg1</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span> <span class="kr">do</span>
<span class="kr">if</span> <span class="n">arg1</span><span class="o">==</span><span class="s2">&quot;-&quot;</span><span class="o">..</span><span class="p">(</span><span class="n">s</span><span class="p">:</span><span class="n">sub</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">))</span> <span class="ow">or</span> <span class="n">arg1</span><span class="o">==</span><span class="s2">&quot;--&quot;</span><span class="o">..</span><span class="n">k</span> <span class="kr">then</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;true&quot;</span> <span class="ow">and</span> <span class="s2">&quot;false&quot;</span> <span class="ow">or</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;false&quot;</span> <span class="ow">and</span> <span class="s2">&quot;true&quot;</span> <span class="ow">or</span> <span class="n">arg</span><span class="p">[</span><span class="n">argv</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span>
<span class="n">t</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">l</span><span class="p">.</span><span class="n">coerce</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">t</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-16'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-16'>#</a>
</div>
<hr />
<h2>Maths</h2>
</div>
<div class='code'>
<div class="highlight"><pre></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-17'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-17'>#</a>
</div>
<p><code>phi(number,number,number) -&gt; float</code><br />
Lin (1989)&rsquo;s approximation to Gaussian cfg.</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">phi</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">mu</span><span class="p">,</span><span class="n">sd</span><span class="p">,</span> <span class="n">z</span><span class="p">,</span><span class="n">cdf</span><span class="p">)</span>
<span class="n">cdf</span> <span class="o">=</span> <span class="kr">function</span><span class="p">(</span><span class="n">z</span><span class="p">)</span> <span class="kr">return</span> <span class="mi">1</span> <span class="o">-</span> <span class="mf">0.5</span><span class="o">*</span><span class="mf">2.718</span><span class="o">^</span><span class="p">(</span><span class="o">-</span><span class="mf">0.717</span> <span class="o">*</span> <span class="n">z</span> <span class="o">-</span> <span class="mf">0.416</span> <span class="o">*</span> <span class="n">z</span> <span class="o">*</span> <span class="n">z</span><span class="p">)</span> <span class="kr">end</span>
<span class="n">z</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="n">mu</span><span class="p">)</span><span class="o">/</span><span class="n">sd</span>
<span class="kr">return</span> <span class="n">z</span> <span class="o">&gt;=</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">cdf</span><span class="p">(</span><span class="n">z</span><span class="p">)</span> <span class="ow">or</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">cdf</span><span class="p">(</span><span class="o">-</span><span class="n">z</span><span class="p">)</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-18'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-18'>#</a>
</div>
<p><code>rnd(atom,?int) -&gt; number</code> <br />
Non-numerics round to themselves. If it can be an int, then return int.
Else return a float with out <code>ndecs</code> places.</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">rnd</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">ndecs</span><span class="p">,</span> <span class="n">mult</span><span class="p">)</span>
<span class="kr">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">n</span><span class="p">)</span> <span class="o">~=</span> <span class="s2">&quot;number&quot;</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">n</span> <span class="kr">end</span>
<span class="kr">if</span> <span class="nb">math.floor</span><span class="p">(</span><span class="n">n</span><span class="p">)</span> <span class="o">==</span> <span class="n">n</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">n</span> <span class="kr">end</span>
<span class="n">mult</span> <span class="o">=</span> <span class="mi">10</span><span class="o">^</span><span class="p">(</span><span class="n">ndecs</span> <span class="ow">or</span> <span class="n">the</span><span class="p">.</span><span class="n">ndecs</span><span class="p">)</span>
<span class="kr">return</span> <span class="nb">math.floor</span><span class="p">(</span><span class="n">n</span> <span class="o">*</span> <span class="n">mult</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span> <span class="o">/</span> <span class="n">mult</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-19'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-19'>#</a>
</div>
<hr />
<h2>Lists</h2>
</div>
<div class='code'>
<div class="highlight"><pre></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-20'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-20'>#</a>
</div>
<p><code>push(list, any) -&gt; any</code> </p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">push</span><span class="p">(</span><span class="n">t</span><span class="p">,</span><span class="n">x</span><span class="p">)</span>
<span class="n">t</span><span class="p">[</span><span class="mi">1</span><span class="o">+#</span><span class="n">t</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span><span class="p">;</span> <span class="kr">return</span> <span class="n">x</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-21'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-21'>#</a>
</div>
<p><code>has(list[any]) -&gt; iterator -&gt; any</code></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">function</span> <span class="nc">l</span><span class="p">.</span><span class="nf">has</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span><span class="n">i</span><span class="p">)</span>
<span class="n">i</span><span class="p">,</span><span class="n">n</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="o">#</span><span class="n">t</span>
<span class="kr">return</span> <span class="kr">function</span><span class="p">()</span>
<span class="n">i</span> <span class="o">=</span> <span class="n">i</span><span class="o">+</span><span class="mi">1</span>
<span class="kr">if</span> <span class="n">i</span><span class="o">&lt;=</span><span class="n">n</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">t</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kd">local</span> <span class="n">m</span><span class="o">=</span><span class="p">{}</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-22'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-22'>#</a>
</div>
<hr />
</div>
<div class='code'>
<div class="highlight"><pre><span class="kr">return</span> <span class="n">l</span>
</pre></div>
</div>
</div>
<div class='clearall'></div>
</div>
</body>
-- <!-- vim: set ts=2 sw=2 sts=2 et: -->
-- lib.lua : msc lua tricks
-- (c) Tim Menzies <[email protected]> BSD2
local l={}
-- --------------------------------------------------------------------------------------
-- ## My types
-- - `list`s have numeric integers
-- - `dict`s have key indexes
-- - `table` = `list` | `dict`
-- - `number` = `float` | `init`
-- `atom` = `str` | `bool` | `number`
-- --------------------------------------------------------------------------------------
-- ## Constants
l.big = 1E30
-- --------------------------------------------------------------------------------------
-- ## Linting
-- `rogues() -> nil`
-- Report anything not declared "local".
function l.rogues()
for k,x in pairs(_ENV) do
if not l.b4[k] then print("Rogue?", k, type(x)) end end end
-- Cache what is needed for `rogues()`
l.b4={}; for k,_ in pairs(_ENV) do l.b4[k]=k end
-- --------------------------------------------------------------------------------------
-- ## Lists
-- `sort(t:list, f:fun) -> any`
function l.sort(t,f)
table.sort(t,f); return t end
function l.by(k)
return function (a,b) return a[k] < b[k] end end
-- `push(t:list, x:any) -> any`
function l.push(t,x)
t[1+#t] = x; return x end
-- `shuffle(t:list) -> list`
-- Return a new list, contain `t`'s contents, shuffled.
function l.shuffle(t, u,j)
u={}; for _,x in pairs(t) do u[1+#u]=x; end;
for i = #u,2,-1 do j=math.random(i); u[i],u[j] = u[j],u[i] end
return u end
-- `slice(t:list, ?go:number=1, ?stop:number=#list, ?inc:number=1) -> list`
-- Return `t[go..stop]` (inclusive) in steps of `inc`.
function l.slice(t, go, stop, inc, u)
if go and go < 0 then go=#t+go end
if stop and stop < 0 then stop=#t+stop end
u={}
for j=(go or 1)//1,(stop or #t)//1,(inc or 1)//1 do u[1+#u]=t[j] end
return u end
-- `removes(t:list,n:float) -> list`
-- Pop the last `n` items from `t`, return those items as a list.
function l.removes(t,n, u)
u={}; for _ = 1,(n//1) do u[1+#u] = table.remove(t) end; return u end
-- `has(t:list[any]) -> iterator -> any`
function l.has(t, n,i)
i,n = 0,#t
return function()
i = i+1
if i<=n then return t[i] end end end local m={}
-- --------------------------------------------------------------------------------------
-- ## Maths
-- `phi(ix:number, mu:number, sd:number) -> float`
-- Lin (1989)'s approximation to Gaussian cfg.
function l.phi(x,mu,sd, z,cdf)
cdf = function(z) return 1 - 0.5*2.718^(-0.717 * z - 0.416 * z * z) end
z = (x-mu)/sd
return z >= 0 and cdf(z) or 1 - cdf(-z) end
-- `rnd(n:atom, ?ndecs:int) -> number`
-- Non-numerics round to themselves. If it can be an int, then return int.
-- Else return a float with out `ndecs` places.
function l.rnd(n, ndecs, mult)
if type(n) ~= "number" then return n end
if math.floor(n) == n then return n end
mult = 10^(ndecs or 3)
return math.floor(n * mult + 0.5) / mult end
-- `entropy(t:table) -> float`
function l.entropy(t, e,N)
N=0; for n in l.has(t) do N = N+n end
e=0; for n in l.has(t) do e = n/N * math.log(n/N,2) end
return -e end
-- --------------------------------------------------------------------------------------
-- ## String stuff
-- ### Thing to string
-- `fmt(str,any1,any2,...) -> str`
l.fmt = string.format
-- `o(t:any) -> str`
function l.o(t, u)
if type(t) ~= "table" then return tostring(l.rnd(t)) end
u={}; for k,v in pairs(t) do
v=l.o(v)
u[1+#u] = #t>0 and v or l.fmt(":%s %s", k, v) end
if #t==0 then table.sort(u) end
return "(".. table.concat(u," ") ..")" end
-- ### String to thing
-- `coerce(s:str) --> atom`
function l.coerce(s, fun)
fun = function(s) if s=="nil" then return nil else
return s=="true" or (s ~="false" and s) or false end end
return math.tointeger(s) or tonumber(s) or fun(s:match'^%s*(.*%S)') end
-- `values(s:str) --> list[atom]`
function l.values(s, t)
t={}
for s1 in s:gmatch("([^,]+)") do t[1+#t]=l.coerce(s1) end
return t end
-- `csv(?file:str) -> iterator -> list[atom]`
-- If `file` is nil, then read from sttdio
function l.csv( file, fun,stream)
stream = file and io.input(file) or io.stdin
return function( s)
s=io.read()
if s then return l.values(s) else io.close(stream) end end end
-- --------------------------------------------------------------------------------------
-- ## Settings
-- `settings(s:str) --> dict[str,atom]`
-- Makes one `t[key]=coerce(default)` for all<br>line with `--key...=default`.
function l.settings(s, t)
t = {}
for k,s1 in s:gmatch("[-][-]([%S]+)[^=]+=[%s]*([%S]+)") do t[k]=l.coerce(s1) end
return t end
-- `cli(t:dict) -> dict`
-- The value of `dict`'s at `key` is updated<br>by command line flag `-k` or `--key`.
function l.cli(t)
for key, s in pairs(t) do
s = tostring(key)
for argv,arg1 in pairs(arg) do
if arg1=="-"..(s:sub(1,1)) or arg1=="--"..s then
s = s=="true" and "false" or s=="false" and "true" or arg[argv+1]
t[key] = l.coerce(s) end end end
return t end
-- --------------------------------------------------------------------------------------
-- ## And finally...
-- Share and enjoy.
return l
#-
#- ._ _ _.| _ ._ _ _| _.
#- | | |(_||<(/_ | | |\/ (_|(_|\/
#- / /
BASE = https://gist.github.com/timm/5472f90e896d57e7e86611c238942e4e\#file-
HUME = <a href="$(BASE)ruler-md">home</a>
CONTRIB = <a href="CONRIBUTE.md">contribute</a>
RULER = <a href="ruler.html">ruler</a>
LIB = <a href="lib.html">lib</a>
MENU = $(HUME) | $(CONTRIB) | $(RULER) | $(LIB)
IMAGE = <img src="icon.png"" align=right width=150>
CSS = p { text-align: right;} .docs code {xfont-weight: bold; font-size: x-small; color: \#954121;}
#-------------------------------------------------------------------------
SHELL := bash
MAKEFLAGS += --warn-undefined-variables
.SILENT:
#-------------------------------------------------------------------------
help : ## show help
awk 'BEGIN {FS = ":.*?## "; print "\nmake [WHAT]" } \
/^[^[:space:]].*##/ {printf " \033[36m%-15s\033[0m : %s\n", $$1, $$2} ' \
$(MAKEFILE_LIST)
awk 'sub(/#\- /,"") { printf "\033[36m%-15s\033[0m \n", $$0}' Makefile
pull:
git pull
push:
echo -n "git comment why> "; read x; git commit -am "$$x"; git push; git status
doco: ~/tmp/ruler.html ~/tmp/lib.html
~/tmp/%.html: %.lua ## .lua --> .html
pycco -d ~/tmp $<
echo "$(CSS)" >> ~/tmp/pycco.css
sed -i '' 's?<h1>?$(MENU)<hr>$(IMAGE)&?' $@
cp icon.png ~/tmp
open $@
%.pdf : %.lua ## .py --> .lua
#mkdir -p $(dirname $@)
echo "pdf-ing $@ ... "
a2ps \
-Br \
--chars-per-line 100 \
--file-align=fill \
--line-numbers=1 \
--borders=no \
--pro=color \
--left-title="" \
--columns 3 \
-M letter \
--footer="" \
--right-footer="" \
-o [email protected] $<
ps2pdf [email protected] $@; rm [email protected]
open $@
#!/usr/bin/env bash
what="$*"
figlet -W -f mini "$what" | gawk -v what=$what 'NF>0 {print "-- <!-- " $0 " -->"} END {print "-- ## "what "\n" }'
#!/usr/bin/env lua
-- <!-- vim: set ts=2 sw=2 sts=2 et: -->
local b4={}; for k,_ in pairs(_ENV) do b4[k]=k end
local the={bins=2}
local l,SYM,NUM,COLS = {},{}
local big = 1E30
function SYM:new(s,n) return {at=n, txt=s, n=0, has={}, most=0, mode=nil} end
function NUM:new(s,n) return {at=n, txt=s, n=0, sum=0, lo=big, hi=-big,
want = (s or ""):find"-$" and 0 or 1, ys={}} end
local function COL(n,s) return ((s or ""):find"^[A-Z]*" and NUM or SYM)(s,n) end
function COLS:new(names)
self.name = names
self.all = kap(names, COL)
for _,col in pairs(self.all) do
if not col.txt:find"X$" then
l.push(col.txt:find"[!+-]$" and self.y or self.x, col) end end end
function SYM:add(x, n)
if x ~= "?" then
n = n or 1
self.n = self.n + n
self.has[x] = n + (self.has[x] or 0)
if self.has[x] > self.most then self.mode, self.most = x, self.has[x] end end
return x end
function NUM:add(x, n)
if x ~= "?" then
n = n or 1
self.n = self.n + n
self.lo = math.min(x, self.lo)
self.hi = math.max(x, self.hi)
self.sum = self.sum + (x*n) end
return x end
function SYM:with(other)
it = SYM(self.txt, self.at)
it.n = self.n + other.n
it.lo = math.min(self.lo, other.lo)
it.hi = math.max(self.hi, other.hi)
for k,c1 in pairs(self.has) do
c2 = other.has[k] or 0
it.has[k] = (it.has[k] or 0) + (c1*self.n + c2*other.n)/(self.n + other.n) end end
function CENTROID:new(t, n) self.n, self.cells = n or 1,t end
function CENTROID:combine(other, t)
t={}
for k,a in pairs(self) do t[k] = l.combine(a, other[k], self.n, other.n) end
return ROW(t, self.n + other.n) end
all={n=0, mu=0, m2=0, lo=nil, hi=nil, names=nil, rows={}}
-- --------------------------------------------------------------------------------------
l.fmt = string.format
l.cat = table.concat
function l.map(t,fun, u) u={};for _,v in pairs(t) do u[1+#u]=fun(v) end;return u end
function l.kap(t,fun, u) u={};for k,v in pairs(t) do u[1+#u]=fun(k,v) end;return u end
function l.push(t.x) t[1+#t]=x; return x end
function l.combine(a,b,m,n)
if a=="?" then return b end
if b=="?" then return a end
if type(a) == "number" then return (m*a + n*b)/(m+n) end
return m>n and a or b end
function l.sort(t,fun) table.sort(t,fun); return t end
function l.o(t, list,dict)
function list(t,u) for _,v in pairs(t) do u[1+#u] = l.o(v) end ; return u end
function dict(t,u)
for k,v in pairs(t) do u[1+#u] = l.fmt(":%s %s",k,l.o(v)) end; return l.sort(u) end
if type(t) ~= "table" then return tostring(t) end
return (t._name or "") .. "(" .. l.cat((#t==0 and dict or list)(t,{}) ," ") .. ")" end
function l.rogues()
for k,v in pairs(_ENV) do if not b4[k] then print("Rogue?",k,type(v)) end end end
function l.obj(s, t)
local isa=setmetatable
t._name=s; t.__index=t; t.__tostring=l.o;
isa(t,{__call=function(_,...) local i=isa({},t) return isa(t.new(i,...) or i,t) end}) end
-- --------------------------------------------------------------------------------------
local eg={}
function eg.help() print("near.lua v0.01") end
function eg.the() print(l.o(the)) end
function eg.num() print(ROW{20,10,30}) end
-- --------------------------------------------------------------------------------------
for s,t in pairs{ROW=ROW} do l.obj(s,t) end
eg[arg[1] or "help"]()
l.rogues()

.

Notes

  • Explanation is really inference. Explanation does not just augment "it", it can replace "it".
  • Epsilons are large. If you do enough repeated trials you realize how unstable are our conclusions if we (e.g.) runt he whole inference 20 times, each time using 90% of the day. So often, minor improvements are spurious since (if you do the stats) there is not small set of solutions that are statistically indistinguishable from the best solution. so one tactic here is to get close, then stop early
    • see the Hamlet equation: big impact on inference
    • note, not for safety critical apps
  • Very little is key. much of the signal in just a few variables
/*--------------------- Layout and Typography ----------------------------*/
body {
font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif;
font-size: 16px;
line-height: 24px;
color: #252519;
margin: 0; padding: 0;
background: #f5f5ff;
}
a {
color: #261a3b;
}
a:visited {
color: #261a3b;
}
p {
margin: 0 0 15px 0;
}
h1, h2, h3, h4, h5, h6 {
margin: 40px 0 15px 0;
}
h2, h3, h4, h5, h6 {
margin-top: 0;
}
#container {
background: white;
}
#container, div.section {
position: relative;
}
#background {
position: absolute;
top: 0; left: 580px; right: 0; bottom: 0;
background: #f5f5ff;
border-left: 1px solid #e5e5ee;
z-index: 0;
}
#jump_to, #jump_page {
background: white;
-webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777;
-webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px;
font: 10px Arial;
text-transform: uppercase;
cursor: pointer;
text-align: right;
}
#jump_to, #jump_wrapper {
position: fixed;
right: 0; top: 0;
padding: 5px 10px;
}
#jump_wrapper {
padding: 0;
display: none;
}
#jump_to:hover #jump_wrapper {
display: block;
}
#jump_page {
padding: 5px 0 3px;
margin: 0 0 25px 25px;
}
#jump_page .source {
display: block;
padding: 5px 10px;
text-decoration: none;
border-top: 1px solid #eee;
}
#jump_page .source:hover {
background: #f5f5ff;
}
#jump_page .source:first-child {
}
div.docs {
float: left;
max-width: 500px;
min-width: 500px;
min-height: 5px;
padding: 10px 25px 1px 50px;
vertical-align: top;
text-align: left;
}
.docs pre {
margin: 15px 0 15px;
padding-left: 15px;
}
.docs p tt, .docs p code {
background: #f8f8ff;
border: 1px solid #dedede;
font-size: 12px;
padding: 0 0.2em;
}
.octowrap {
position: relative;
}
.octothorpe {
font: 12px Arial;
text-decoration: none;
color: #454545;
position: absolute;
top: 3px; left: -20px;
padding: 1px 2px;
opacity: 0;
-webkit-transition: opacity 0.2s linear;
}
div.docs:hover .octothorpe {
opacity: 1;
}
div.code {
margin-left: 580px;
padding: 14px 15px 16px 50px;
vertical-align: top;
}
.code pre, .docs p code {
font-size: 12px;
}
pre, tt, code {
line-height: 18px;
font-family: Monaco, Consolas, "Lucida Console", monospace;
margin: 0; padding: 0;
}
div.clearall {
clear: both;
}
/*---------------------- Syntax Highlighting -----------------------------*/
td.linenos { background-color: #f0f0f0; padding-right: 10px; }
span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
body .hll { background-color: #ffffcc }
body .c { color: #408080; font-style: italic } /* Comment */
body .err { border: 1px solid #FF0000 } /* Error */
body .k { color: #954121 } /* Keyword */
body .o { color: #666666 } /* Operator */
body .cm { color: #408080; font-style: italic } /* Comment.Multiline */
body .cp { color: #BC7A00 } /* Comment.Preproc */
body .c1 { color: #408080; font-style: italic } /* Comment.Single */
body .cs { color: #408080; font-style: italic } /* Comment.Special */
body .gd { color: #A00000 } /* Generic.Deleted */
body .ge { font-style: italic } /* Generic.Emph */
body .gr { color: #FF0000 } /* Generic.Error */
body .gh { color: #000080; font-weight: bold } /* Generic.Heading */
body .gi { color: #00A000 } /* Generic.Inserted */
body .go { color: #808080 } /* Generic.Output */
body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
body .gs { font-weight: bold } /* Generic.Strong */
body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
body .gt { color: #0040D0 } /* Generic.Traceback */
body .kc { color: #954121 } /* Keyword.Constant */
body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */
body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */
body .kp { color: #954121 } /* Keyword.Pseudo */
body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */
body .kt { color: #B00040 } /* Keyword.Type */
body .m { color: #666666 } /* Literal.Number */
body .s { color: #219161 } /* Literal.String */
body .na { color: #7D9029 } /* Name.Attribute */
body .nb { color: #954121 } /* Name.Builtin */
body .nc { color: #0000FF; font-weight: bold } /* Name.Class */
body .no { color: #880000 } /* Name.Constant */
body .nd { color: #AA22FF } /* Name.Decorator */
body .ni { color: #999999; font-weight: bold } /* Name.Entity */
body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
body .nf { color: #0000FF } /* Name.Function */
body .nl { color: #A0A000 } /* Name.Label */
body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
body .nt { color: #954121; font-weight: bold } /* Name.Tag */
body .nv { color: #19469D } /* Name.Variable */
body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
body .w { color: #bbbbbb } /* Text.Whitespace */
body .mf { color: #666666 } /* Literal.Number.Float */
body .mh { color: #666666 } /* Literal.Number.Hex */
body .mi { color: #666666 } /* Literal.Number.Integer */
body .mo { color: #666666 } /* Literal.Number.Oct */
body .sb { color: #219161 } /* Literal.String.Backtick */
body .sc { color: #219161 } /* Literal.String.Char */
body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */
body .s2 { color: #219161 } /* Literal.String.Double */
body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
body .sh { color: #219161 } /* Literal.String.Heredoc */
body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
body .sx { color: #954121 } /* Literal.String.Other */
body .sr { color: #BB6688 } /* Literal.String.Regex */
body .s1 { color: #219161 } /* Literal.String.Single */
body .ss { color: #19469D } /* Literal.String.Symbol */
body .bp { color: #954121 } /* Name.Builtin.Pseudo */
body .vc { color: #19469D } /* Name.Variable.Class */
body .vg { color: #19469D } /* Name.Variable.Global */
body .vi { color: #19469D } /* Name.Variable.Instance */
body .il { color: #666666 } /* Literal.Number.Integer.Long */
-- luarocks install my_script-1.0-1.rockspec
package = "ruler"
version = "0.0-1"
source = {
url = "https://gist.githubusercontent.com/timm/5472f90e896d57e7e86611c238942e4e/raw/876d6c8b9257a9fb530c3cd3e9690a7cf559f4c7/ruler.lua"
}
description = {
summary = "Simple explainable AI",
detailed = "Builds a Bayes classifier, then extracts rules from differences in class distributions",
license = "BSD",
homepage = "https://gist.github.com/timm/5472f90e896d57e7e86611c238942e4e"
}
build = {
type = "builtin",
modules = {
my_script = "ruler.lua"
},
bin = {
my_script = "ruler.lua"
}
}
#!/usr/bin/env gawk -f
# <!-- vim: set ts=2 sw=2 sts=2 et: -->
BEGIN { FS=","
the["seed"] = 123456891
the["data"] = "../../../erz/data/misc/auto93.csv"
}
#NR==1 { split($0,names,",")
function num(i) { n[i] = mu[i] = m2[i] = sd[i] = 0; lo=1E30; hi=-1E30 }
function sym(i) { n[i] = 0; new(has,i) }
function add(i,x) { i in has ? add2num(i,x) : add2sym(i,x) }
function add2num(i,x, d) {
if (x != "?") {
lo[i] = min(lo[i], x)
hi[i] = max(hi[i], x)
n[i] += 1
d = x - mu[i]
mu[i] += d/n[i]
m2[i] += d*(x - mu[i])
sd[i] = n[i] < 2 ? 0 : (m2[i]/(n[i] - 1))^0.5 }}
# -------------------------------------------------
function new(a,i) { a[1]=""; delete a[1] }
function min(a,b) { return a<b ? a : b }
function max(a,b) { return a>b ? a : b }
# Pretty print an array.
function oo(x) { print o(x) }
function cdf(x,mu,sd, z) {
z = (x-mu) / (sd + 1E-30)
return z>=0 ? cdf1(z) : 1 - cdf1(-z) }
function cdf1(z) { return 1 - 0.5*2.718^(-0.717 * z - 0.416 * z * z) }
# Generate a pretty string from an array.
function o(x) {
if (typeof(x)=="array") {
for(k in x) return "(" (k+0==k ? o1(x) : o2(x)) ")"
} else return x }
# Generate a pretty string from a real array.
function o1(a, s,sep) {
for(k in a) {s = s sep o(a[k]); sep=" "}; return s }
# Generate a pretty string from an array with keys.
function o2(a, s,sep) {
for(k in a) {s = s sep ":"k " " o(a[k]); sep=" "}; return s }
#-------------------------------------------------
BEGIN { main(ARGV[1]) }
function main(s, fun) {
srand(the["seed"])
fun = "eg_" s
print(fun)
if (fun in FUNCTAB) { @fun() }}
function eg_one( i,names) {
split("asdas,asdasd",names,/,/)
for(i in names) { (i ~ /^[A-Z]/) ? num(i) : sym(i) }}
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>ruler.lua</title>
<link rel="stylesheet" href="pycco.css">
</head>
<body>
<div id='container'>
<div id="background"></div>
<div class='section'>
<div class='docs'><a href="https://gist.github.com/timm/5472f90e896d57e7e86611c238942e4e#file-ruler-md">home</a> | <a href="https://gist.github.com/timm/5472f90e896d57e7e86611c238942e4e#file-CONRIBUTE.md">contribute</a> <hr><img src="icon.png"" align=right width=250><h1>ruler.lua</h1></div>
</div>
<div class='clearall'>
<div class='section' id='section-0'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-0'>#</a>
</div>
<!-- vim: set ts=2 sw=2 sts=2 et: -->
</div>
<div class='code'>
<div class="highlight"><pre><span></span><span class="kd">local</span> <span class="n">the</span><span class="p">,</span><span class="n">help</span> <span class="o">=</span> <span class="p">{},</span><span class="s">[[</span>
<span class="s">ruler.lua v0.1: an experiment in Chebyshev waiting</span>
<span class="s">(c) Tim Menzies &lt;[email protected]&gt; BSD2</span>
<span class="s">USAGE: ./ruler.lua [OPTIONS]</span>
<span class="s">SETTINGS:</span>
<span class="s"> -b --bins = 7</span>
<span class="s"> -h --help = false</span>
<span class="s"> -n --ndecs = 3</span>
<span class="s"> -R --Run = nothing</span>
<span class="s"> -s --seed = 1234567891</span>
<span class="s"> -t --train = ../ezr/data/misc/auto93.csv</span>
<span class="s"> -v --version = false]]</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-1'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-1'>#</a>
</div>
<hr />
<!-- | o |_ -->
<!-- | | |_) -->
<h2>lib</h2>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kd">local</span> <span class="n">b4</span><span class="o">=</span><span class="p">{};</span> <span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">_</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">_ENV</span><span class="p">)</span> <span class="kr">do</span> <span class="n">b4</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="o">=</span><span class="n">k</span> <span class="kr">end</span>
<span class="kd">local</span> <span class="kr">function</span> <span class="nf">rogues</span><span class="p">()</span>
<span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">x</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">_ENV</span><span class="p">)</span> <span class="kr">do</span>
<span class="kr">if</span> <span class="ow">not</span> <span class="n">b4</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="kr">then</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Rogue?&quot;</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kd">local</span> <span class="n">big</span><span class="p">,</span><span class="n">fmt</span><span class="p">,</span><span class="n">coerce</span><span class="p">,</span><span class="n">words</span><span class="p">,</span><span class="n">csv</span><span class="p">,</span><span class="n">settings</span><span class="p">,</span><span class="n">cli</span><span class="p">,</span><span class="n">phi</span><span class="p">,</span><span class="n">rnd</span><span class="p">,</span><span class="n">o</span><span class="p">,</span><span class="n">push</span><span class="p">,</span><span class="n">has</span>
<span class="n">big</span> <span class="o">=</span> <span class="mf">1E30</span>
<span class="n">fmt</span> <span class="o">=</span> <span class="nb">string.format</span>
<span class="kr">function</span> <span class="nf">coerce</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">fun</span><span class="p">)</span>
<span class="n">fun</span> <span class="o">=</span> <span class="kr">function</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="kr">if</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;nil&quot;</span> <span class="kr">then</span> <span class="kr">return</span> <span class="kc">nil</span> <span class="kr">else</span>
<span class="kr">return</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;true&quot;</span> <span class="ow">or</span> <span class="p">(</span><span class="n">s</span> <span class="o">~=</span><span class="s2">&quot;false&quot;</span> <span class="ow">and</span> <span class="n">s</span><span class="p">)</span> <span class="ow">or</span> <span class="kc">false</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="nb">math.tointeger</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">tonumber</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="ow">or</span> <span class="n">fun</span><span class="p">(</span><span class="n">s</span><span class="p">:</span><span class="n">match</span><span class="s1">&#39;^%s*(.*%S)&#39;</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">words</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
<span class="n">t</span><span class="o">=</span><span class="p">{}</span>
<span class="kr">for</span> <span class="n">s1</span> <span class="kr">in</span> <span class="n">s</span><span class="p">:</span><span class="n">gmatch</span><span class="p">(</span><span class="s2">&quot;([^,]+)&quot;</span><span class="p">)</span> <span class="kr">do</span> <span class="n">t</span><span class="p">[</span><span class="mi">1</span><span class="o">+#</span><span class="n">t</span><span class="p">]</span><span class="o">=</span><span class="n">coerce</span><span class="p">(</span><span class="n">s1</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">t</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">csv</span><span class="p">(</span> <span class="n">sFile</span><span class="p">,</span> <span class="n">fun</span><span class="p">,</span><span class="n">stream</span><span class="p">)</span>
<span class="n">stream</span> <span class="o">=</span> <span class="n">sFile</span> <span class="ow">and</span> <span class="nb">io.input</span><span class="p">(</span><span class="n">sFile</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">io.stdin</span>
<span class="kr">return</span> <span class="kr">function</span><span class="p">(</span> <span class="n">s</span><span class="p">)</span>
<span class="n">s</span><span class="o">=</span><span class="nb">io.read</span><span class="p">()</span>
<span class="kr">if</span> <span class="n">s</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">words</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="kr">else</span> <span class="nb">io.close</span><span class="p">(</span><span class="n">stream</span><span class="p">)</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">settings</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
<span class="n">t</span> <span class="o">=</span> <span class="p">{}</span>
<span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">s1</span> <span class="kr">in</span> <span class="n">s</span><span class="p">:</span><span class="n">gmatch</span><span class="p">(</span><span class="s2">&quot;[-][-]([%S]+)[^=]+=[%s]*([%S]+)&quot;</span><span class="p">)</span> <span class="kr">do</span> <span class="n">t</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="o">=</span><span class="n">coerce</span><span class="p">(</span><span class="n">s1</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">t</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">cli</span><span class="p">(</span><span class="n">t</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">s</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="kr">do</span>
<span class="n">s</span> <span class="o">=</span> <span class="nb">tostring</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">argv</span><span class="p">,</span><span class="n">arg1</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span> <span class="kr">do</span>
<span class="kr">if</span> <span class="n">arg1</span><span class="o">==</span><span class="s2">&quot;-&quot;</span><span class="o">..</span><span class="p">(</span><span class="n">s</span><span class="p">:</span><span class="n">sub</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">))</span> <span class="ow">or</span> <span class="n">arg1</span><span class="o">==</span><span class="s2">&quot;--&quot;</span><span class="o">..</span><span class="n">k</span> <span class="kr">then</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;true&quot;</span> <span class="ow">and</span> <span class="s2">&quot;false&quot;</span> <span class="ow">or</span> <span class="n">s</span><span class="o">==</span><span class="s2">&quot;false&quot;</span> <span class="ow">and</span> <span class="s2">&quot;true&quot;</span> <span class="ow">or</span> <span class="n">arg</span><span class="p">[</span><span class="n">argv</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span>
<span class="n">t</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">coerce</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">t</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">phi</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">mu</span><span class="p">,</span><span class="n">sd</span><span class="p">,</span> <span class="n">z</span><span class="p">,</span><span class="n">cdf</span><span class="p">)</span>
<span class="n">cdf</span> <span class="o">=</span> <span class="kr">function</span><span class="p">(</span><span class="n">z</span><span class="p">)</span> <span class="kr">return</span> <span class="mi">1</span> <span class="o">-</span> <span class="mf">0.5</span><span class="o">*</span><span class="mf">2.718</span><span class="o">^</span><span class="p">(</span><span class="o">-</span><span class="mf">0.717</span> <span class="o">*</span> <span class="n">z</span> <span class="o">-</span> <span class="mf">0.416</span> <span class="o">*</span> <span class="n">z</span> <span class="o">*</span> <span class="n">z</span><span class="p">)</span> <span class="kr">end</span>
<span class="n">z</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="n">mu</span><span class="p">)</span><span class="o">/</span><span class="n">sd</span>
<span class="kr">return</span> <span class="n">z</span> <span class="o">&gt;=</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">cdf</span><span class="p">(</span><span class="n">z</span><span class="p">)</span> <span class="ow">or</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">cdf</span><span class="p">(</span><span class="o">-</span><span class="n">z</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">rnd</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">ndecs</span><span class="p">,</span> <span class="n">mult</span><span class="p">)</span>
<span class="kr">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">n</span><span class="p">)</span> <span class="o">~=</span> <span class="s2">&quot;number&quot;</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">n</span> <span class="kr">end</span>
<span class="kr">if</span> <span class="nb">math.floor</span><span class="p">(</span><span class="n">n</span><span class="p">)</span> <span class="o">==</span> <span class="n">n</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">n</span> <span class="kr">end</span>
<span class="n">mult</span> <span class="o">=</span> <span class="mi">10</span><span class="o">^</span><span class="p">(</span><span class="n">ndecs</span> <span class="ow">or</span> <span class="n">the</span><span class="p">.</span><span class="n">ndecs</span><span class="p">)</span>
<span class="kr">return</span> <span class="nb">math.floor</span><span class="p">(</span><span class="n">n</span> <span class="o">*</span> <span class="n">mult</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span> <span class="o">/</span> <span class="n">mult</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">o</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">u</span><span class="p">)</span>
<span class="kr">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="o">~=</span> <span class="s2">&quot;table&quot;</span> <span class="kr">then</span> <span class="kr">return</span> <span class="nb">tostring</span><span class="p">(</span><span class="n">rnd</span><span class="p">(</span><span class="n">t</span><span class="p">))</span> <span class="kr">end</span>
<span class="n">u</span><span class="o">=</span><span class="p">{};</span> <span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">v</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="kr">do</span> <span class="n">u</span><span class="p">[</span><span class="mi">1</span><span class="o">+#</span><span class="n">u</span><span class="p">]</span> <span class="o">=</span> <span class="o">#</span><span class="n">t</span><span class="o">&gt;</span><span class="mi">0</span> <span class="ow">and</span> <span class="n">o</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="ow">or</span> <span class="n">fmt</span><span class="p">(</span><span class="s2">&quot;:%s %s&quot;</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">o</span><span class="p">(</span><span class="n">v</span><span class="p">))</span> <span class="kr">end</span>
<span class="kr">if</span> <span class="o">#</span><span class="n">t</span><span class="o">==</span><span class="mi">0</span> <span class="kr">then</span> <span class="nb">table.sort</span><span class="p">(</span><span class="n">u</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="s2">&quot;(&quot;</span><span class="o">..</span> <span class="nb">table.concat</span><span class="p">(</span><span class="n">u</span><span class="p">,</span><span class="s2">&quot; &quot;</span><span class="p">)</span> <span class="o">..</span><span class="s2">&quot;)&quot;</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">push</span><span class="p">(</span><span class="n">t</span><span class="p">,</span><span class="n">x</span><span class="p">)</span>
<span class="n">t</span><span class="p">[</span><span class="mi">1</span><span class="o">+#</span><span class="n">t</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span><span class="p">;</span> <span class="kr">return</span> <span class="n">x</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">has</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span><span class="n">i</span><span class="p">)</span>
<span class="n">i</span><span class="p">,</span><span class="n">n</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="o">#</span><span class="n">t</span>
<span class="kr">return</span> <span class="kr">function</span><span class="p">()</span>
<span class="n">i</span> <span class="o">=</span> <span class="n">i</span><span class="o">+</span><span class="mi">1</span>
<span class="kr">if</span> <span class="n">i</span><span class="o">&lt;=</span><span class="n">n</span> <span class="kr">then</span> <span class="kr">return</span> <span class="n">t</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-2'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-2'>#</a>
</div>
<hr />
<!-- _ _ | ._ _ ._ _ -->
<!-- (_ (_) | |_| | | | | | _> -->
<h2>columns</h2>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kd">local</span> <span class="n">NUM</span><span class="p">,</span> <span class="n">SYM</span><span class="p">,</span> <span class="n">isNum</span><span class="p">,</span> <span class="n">mid</span><span class="p">,</span> <span class="n">div</span><span class="p">,</span> <span class="n">norm</span><span class="p">,</span> <span class="n">_add2Sym</span><span class="p">,</span> <span class="n">_add2Num</span><span class="p">,</span> <span class="n">add</span>
<span class="kr">function</span> <span class="nf">SYM</span><span class="p">(</span><span class="n">s</span><span class="p">,</span><span class="n">n</span><span class="p">)</span> <span class="kr">return</span> <span class="p">{</span><span class="n">at</span><span class="o">=</span><span class="n">n</span><span class="p">,</span> <span class="n">txt</span><span class="o">=</span><span class="n">s</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">seen</span><span class="o">=</span><span class="p">{},</span> <span class="n">most</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">model</span><span class="o">=</span><span class="kc">nil</span><span class="p">}</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">NUM</span><span class="p">(</span><span class="n">s</span><span class="p">,</span><span class="n">n</span><span class="p">)</span> <span class="kr">return</span> <span class="p">{</span><span class="n">at</span><span class="o">=</span><span class="n">n</span><span class="p">,</span> <span class="n">txt</span><span class="o">=</span><span class="n">s</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">mu</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">m2</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">sd</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">lo</span><span class="o">=</span><span class="n">big</span><span class="p">,</span> <span class="n">hi</span><span class="o">=-</span><span class="n">big</span><span class="p">,</span>
<span class="n">want</span> <span class="o">=</span> <span class="p">(</span><span class="n">s</span> <span class="ow">or</span> <span class="s2">&quot;&quot;</span><span class="p">):</span><span class="n">find</span><span class="s2">&quot;-$&quot;</span> <span class="ow">and</span> <span class="mi">0</span> <span class="ow">or</span> <span class="mi">1</span><span class="p">}</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">isNum</span><span class="p">(</span><span class="n">num</span><span class="p">)</span> <span class="kr">return</span> <span class="n">num</span><span class="p">.</span><span class="n">mu</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">mid</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="kr">return</span> <span class="n">isNum</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="ow">and</span> <span class="n">col</span><span class="p">.</span><span class="n">mu</span> <span class="ow">or</span> <span class="n">col</span><span class="p">.</span><span class="n">mode</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">div</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="kr">return</span> <span class="n">isNum</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="ow">and</span> <span class="n">col</span><span class="p">.</span><span class="n">sd</span> <span class="ow">or</span> <span class="n">entropy</span><span class="p">(</span><span class="n">col</span><span class="p">.</span><span class="n">seen</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">norm</span><span class="p">(</span><span class="n">num</span><span class="p">,</span><span class="n">x</span><span class="p">)</span>
<span class="kr">return</span> <span class="n">x</span><span class="o">==</span><span class="s2">&quot;?&quot;</span> <span class="ow">and</span> <span class="n">x</span> <span class="ow">or</span> <span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="n">num</span><span class="p">.</span><span class="n">lo</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">num</span><span class="p">.</span><span class="n">hi</span> <span class="o">-</span> <span class="n">num</span><span class="p">.</span><span class="n">lo</span> <span class="o">+</span> <span class="mi">1</span><span class="o">/</span><span class="n">big</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">_add2Sym</span><span class="p">(</span><span class="n">sym</span><span class="p">,</span><span class="n">x</span><span class="p">)</span>
<span class="n">sym</span><span class="p">.</span><span class="n">seen</span><span class="p">[</span><span class="n">x</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">+</span> <span class="p">(</span><span class="n">sym</span><span class="p">.</span><span class="n">seen</span><span class="p">[</span><span class="n">x</span><span class="p">]</span> <span class="ow">or</span> <span class="mi">0</span><span class="p">)</span>
<span class="kr">if</span> <span class="n">sym</span><span class="p">.</span><span class="n">seen</span><span class="p">[</span><span class="n">x</span><span class="p">]</span> <span class="o">&gt;</span> <span class="n">sym</span><span class="p">.</span><span class="n">most</span> <span class="kr">then</span> <span class="n">sym</span><span class="p">.</span><span class="n">most</span><span class="p">,</span><span class="n">sym</span><span class="p">.</span><span class="n">mode</span> <span class="o">=</span> <span class="n">sym</span><span class="p">.</span><span class="n">seen</span><span class="p">[</span><span class="n">x</span><span class="p">],</span> <span class="n">x</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">_add2Num</span><span class="p">(</span><span class="n">num</span><span class="p">,</span><span class="n">n</span><span class="p">,</span> <span class="n">d</span><span class="p">)</span>
<span class="n">num</span><span class="p">.</span><span class="n">lo</span> <span class="o">=</span> <span class="nb">math.min</span><span class="p">(</span><span class="n">num</span><span class="p">.</span><span class="n">lo</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
<span class="n">num</span><span class="p">.</span><span class="n">hi</span> <span class="o">=</span> <span class="nb">math.max</span><span class="p">(</span><span class="n">num</span><span class="p">.</span><span class="n">hi</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
<span class="n">d</span> <span class="o">=</span> <span class="n">n</span> <span class="o">-</span> <span class="n">num</span><span class="p">.</span><span class="n">mu</span>
<span class="n">num</span><span class="p">.</span><span class="n">mu</span> <span class="o">=</span> <span class="n">num</span><span class="p">.</span><span class="n">mu</span> <span class="o">+</span> <span class="n">d</span><span class="o">/</span><span class="n">num</span><span class="p">.</span><span class="n">n</span>
<span class="n">num</span><span class="p">.</span><span class="n">m2</span> <span class="o">=</span> <span class="n">num</span><span class="p">.</span><span class="n">m2</span> <span class="o">+</span> <span class="n">d</span><span class="o">*</span><span class="p">(</span><span class="n">n</span> <span class="o">-</span> <span class="n">num</span><span class="p">.</span><span class="n">mu</span><span class="p">)</span>
<span class="n">num</span><span class="p">.</span><span class="n">sd</span> <span class="o">=</span> <span class="n">num</span><span class="p">.</span><span class="n">n</span> <span class="o">&lt;</span> <span class="mi">2</span> <span class="ow">and</span> <span class="mi">0</span> <span class="ow">or</span> <span class="p">(</span><span class="n">num</span><span class="p">.</span><span class="n">m2</span><span class="o">/</span><span class="p">(</span><span class="n">num</span><span class="p">.</span><span class="n">n</span> <span class="o">-</span> <span class="mi">1</span><span class="p">))</span><span class="o">^</span><span class="mf">0.5</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">add</span><span class="p">(</span><span class="n">col</span><span class="p">,</span><span class="n">x</span><span class="p">)</span>
<span class="kr">if</span> <span class="n">x</span><span class="o">~=</span> <span class="s2">&quot;?&quot;</span> <span class="kr">then</span>
<span class="n">col</span><span class="p">.</span><span class="n">n</span> <span class="o">=</span> <span class="n">col</span><span class="p">.</span><span class="n">n</span> <span class="o">+</span> <span class="mi">1</span>
<span class="p">(</span><span class="n">isNum</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="ow">and</span> <span class="n">_add2Num</span> <span class="ow">or</span> <span class="n">_add2Sym</span><span class="p">)(</span><span class="n">col</span><span class="p">,</span><span class="n">x</span><span class="p">)</span> <span class="kr">end</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-3'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-3'>#</a>
</div>
<hr />
<!-- _| _. _|_ _. -->
<!-- (_| (_| |_ (_| -->
<h2>data</h2>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kd">local</span> <span class="n">_DATA</span><span class="p">,</span><span class="n">COLS</span><span class="p">,</span><span class="n">chebyshev</span><span class="p">,</span> <span class="n">ordered</span><span class="p">,</span> <span class="n">stats</span><span class="p">,</span> <span class="n">_head</span><span class="p">,</span> <span class="n">_body</span><span class="p">,</span> <span class="n">DATA</span><span class="p">,</span> <span class="n">clone</span>
<span class="kr">function</span> <span class="nf">_DATA</span><span class="p">()</span> <span class="kr">return</span> <span class="p">{</span><span class="n">rows</span><span class="o">=</span><span class="p">{},</span> <span class="n">cols</span><span class="o">=</span><span class="p">{}}</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">COLS</span><span class="p">(</span><span class="n">names</span><span class="p">)</span> <span class="kr">return</span> <span class="p">{</span><span class="n">names</span><span class="o">=</span><span class="n">names</span><span class="p">,</span> <span class="n">all</span><span class="o">=</span><span class="p">{},</span> <span class="n">x</span><span class="o">=</span><span class="p">{},</span> <span class="n">y</span><span class="o">=</span><span class="p">{}</span> <span class="p">}</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">chebyshev</span><span class="p">(</span><span class="n">data</span><span class="p">,</span><span class="n">t</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span> <span class="c1">-- je bush off&quot;</span>
<span class="n">n</span><span class="o">=</span><span class="mi">0</span>
<span class="kr">for</span> <span class="n">y</span> <span class="kr">in</span> <span class="n">has</span><span class="p">(</span><span class="n">data</span><span class="p">.</span><span class="n">cols</span><span class="p">.</span><span class="n">y</span><span class="p">)</span> <span class="kr">do</span> <span class="n">n</span> <span class="o">=</span> <span class="nb">math.max</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="nb">math.abs</span><span class="p">(</span><span class="n">y</span><span class="p">.</span><span class="n">want</span> <span class="o">-</span> <span class="n">norm</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">t</span><span class="p">[</span><span class="n">y</span><span class="p">.</span><span class="n">at</span><span class="p">])))</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">n</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">ordered</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="nb">table.sort</span><span class="p">(</span><span class="n">data</span><span class="p">.</span><span class="n">rows</span><span class="p">,</span> <span class="kr">function</span><span class="p">(</span><span class="n">t1</span><span class="p">,</span><span class="n">t2</span><span class="p">)</span> <span class="kr">return</span> <span class="n">chebyshev</span><span class="p">(</span><span class="n">data</span><span class="p">,</span><span class="n">t1</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">chebyshev</span><span class="p">(</span><span class="n">data</span><span class="p">,</span><span class="n">t2</span><span class="p">)</span> <span class="kr">end</span><span class="p">)</span>
<span class="kr">return</span> <span class="n">data</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">stats</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">s</span><span class="p">,</span><span class="n">f</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
<span class="n">t</span><span class="o">=</span><span class="p">{};</span> <span class="kr">for</span> <span class="n">col</span> <span class="kr">in</span> <span class="n">has</span><span class="p">(</span><span class="n">data</span><span class="p">.</span><span class="n">cols</span><span class="p">[</span><span class="n">s</span> <span class="ow">or</span> <span class="s2">&quot;y&quot;</span><span class="p">])</span> <span class="kr">do</span>
<span class="n">t</span><span class="p">[</span><span class="n">col</span><span class="p">.</span><span class="n">txt</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">f</span> <span class="ow">or</span> <span class="n">mid</span><span class="p">)(</span><span class="n">col</span><span class="p">)</span> <span class="kr">end</span><span class="p">;</span> <span class="kr">return</span> <span class="n">t</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">_head</span><span class="p">(</span><span class="n">data</span><span class="p">,</span><span class="n">row</span><span class="p">,</span> <span class="n">hook</span><span class="p">)</span>
<span class="n">data</span><span class="p">.</span><span class="n">hook</span><span class="p">,</span> <span class="n">data</span><span class="p">.</span><span class="n">cols</span> <span class="o">=</span> <span class="n">hook</span><span class="p">,</span> <span class="n">COLS</span><span class="p">(</span><span class="n">row</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">n</span><span class="p">,</span><span class="n">s</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">row</span><span class="p">)</span> <span class="kr">do</span>
<span class="n">push</span><span class="p">(</span> <span class="n">s</span><span class="p">:</span><span class="n">find</span><span class="s2">&quot;[!+-]$&quot;</span> <span class="ow">and</span> <span class="n">data</span><span class="p">.</span><span class="n">cols</span><span class="p">.</span><span class="n">y</span> <span class="ow">or</span> <span class="n">data</span><span class="p">.</span><span class="n">cols</span><span class="p">.</span><span class="n">x</span><span class="p">,</span>
<span class="n">push</span><span class="p">(</span> <span class="n">data</span><span class="p">.</span><span class="n">cols</span><span class="p">.</span><span class="n">all</span><span class="p">,</span>
<span class="p">(</span><span class="n">s</span><span class="p">:</span><span class="n">find</span><span class="s2">&quot;^[A-Z]&quot;</span> <span class="ow">and</span> <span class="n">NUM</span> <span class="ow">or</span> <span class="n">SYM</span><span class="p">)(</span><span class="n">s</span><span class="p">,</span><span class="n">n</span><span class="p">)))</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">data</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">_body</span><span class="p">(</span><span class="n">data</span><span class="p">,</span><span class="n">itsRows</span><span class="p">,</span> <span class="n">isOrdered</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">t</span> <span class="kr">in</span> <span class="n">itsRows</span> <span class="kr">do</span>
<span class="kr">if</span> <span class="n">data</span><span class="p">.</span><span class="n">hook</span> <span class="kr">then</span> <span class="n">data</span><span class="p">.</span><span class="n">hook</span><span class="p">(</span><span class="n">data</span><span class="p">,</span><span class="n">t</span><span class="p">)</span> <span class="kr">end</span>
<span class="n">push</span><span class="p">(</span><span class="n">data</span><span class="p">.</span><span class="n">rows</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">col</span> <span class="kr">in</span> <span class="n">has</span><span class="p">(</span><span class="n">data</span><span class="p">.</span><span class="n">cols</span><span class="p">.</span><span class="n">all</span><span class="p">)</span> <span class="kr">do</span> <span class="n">add</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">t</span><span class="p">[</span><span class="n">col</span><span class="p">.</span><span class="n">at</span><span class="p">])</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">return</span> <span class="n">isOrdered</span> <span class="ow">and</span> <span class="n">ordered</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="ow">or</span> <span class="n">data</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nf">DATA</span><span class="p">(</span><span class="n">itsRows</span><span class="p">,</span> <span class="n">isOrdered</span><span class="p">,</span><span class="n">hook</span><span class="p">)</span>
<span class="kr">return</span> <span class="n">_body</span><span class="p">(</span><span class="n">_head</span><span class="p">(</span><span class="n">_DATA</span><span class="p">(),</span> <span class="n">itsRows</span><span class="p">(),</span><span class="n">hook</span><span class="p">),</span> <span class="c1">-- initialize using row1 of `itsRows</span>
<span class="n">itsRows</span><span class="p">,</span> <span class="c1">-- fill in using other rows</span>
<span class="n">isOrdered</span><span class="p">)</span> <span class="kr">end</span> <span class="c1">-- maybe sort</span>
<span class="kr">function</span> <span class="nf">clone</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">ts</span><span class="p">,</span><span class="n">isOrdered</span><span class="p">)</span>
<span class="kr">return</span> <span class="n">_body</span><span class="p">(</span><span class="n">_head</span><span class="p">(</span><span class="n">_DATA</span><span class="p">(),</span> <span class="n">data</span><span class="p">.</span><span class="n">cols</span><span class="p">.</span><span class="n">names</span><span class="p">),</span> <span class="c1">-- initialize use data&#39;s column names</span>
<span class="n">has</span><span class="p">(</span><span class="n">ts</span> <span class="ow">or</span> <span class="p">{}),</span> <span class="c1">-- fill in using `ts`</span>
<span class="n">isOrdered</span><span class="p">)</span> <span class="kr">end</span> <span class="c1">-- maybe sort</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-4'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-4'>#</a>
</div>
<hr />
<!-- _| _ ._ _ _ _ -->
<!-- (_| (/_ | | | (_) _> -->
<h2>demos</h2>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kd">local</span> <span class="n">eg</span><span class="o">=</span><span class="p">{}</span>
<span class="kr">function</span> <span class="nc">eg</span><span class="p">.</span><span class="nf">the</span><span class="p">()</span> <span class="nb">print</span><span class="p">(</span><span class="n">o</span><span class="p">(</span><span class="n">the</span><span class="p">))</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nc">eg</span><span class="p">.</span><span class="nf">csv</span><span class="p">()</span> <span class="kr">for</span> <span class="n">row</span> <span class="kr">in</span> <span class="n">csv</span><span class="p">(</span><span class="n">the</span><span class="p">.</span><span class="n">train</span><span class="p">)</span> <span class="kr">do</span> <span class="nb">print</span><span class="p">(</span><span class="n">o</span><span class="p">(</span><span class="n">row</span><span class="p">))</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nc">eg</span><span class="p">.</span><span class="nf">data</span><span class="p">(</span> <span class="n">d</span><span class="p">)</span>
<span class="n">d</span><span class="o">=</span> <span class="n">DATA</span><span class="p">(</span><span class="n">csv</span><span class="p">(</span><span class="n">the</span><span class="p">.</span><span class="n">train</span><span class="p">))</span>
<span class="nb">print</span><span class="p">(</span><span class="n">o</span><span class="p">(</span><span class="n">d</span><span class="p">.</span><span class="n">cols</span><span class="p">.</span><span class="n">x</span><span class="p">[</span><span class="mi">4</span><span class="p">]))</span>
<span class="nb">print</span><span class="p">(</span><span class="n">o</span><span class="p">(</span><span class="n">stats</span><span class="p">(</span><span class="n">d</span><span class="p">,</span> <span class="s2">&quot;y&quot;</span><span class="p">,</span><span class="n">mid</span><span class="p">)))</span>
<span class="nb">print</span><span class="p">(</span><span class="o">#</span><span class="n">d</span><span class="p">.</span><span class="n">rows</span><span class="p">)</span> <span class="kr">end</span>
<span class="kr">function</span> <span class="nc">eg</span><span class="p">.</span><span class="nf">data</span><span class="p">(</span> <span class="n">d</span><span class="p">)</span>
<span class="kr">for</span> <span class="n">k</span><span class="p">,</span><span class="n">t</span> <span class="kr">in</span> <span class="nb">pairs</span><span class="p">(</span><span class="n">DATA</span><span class="p">(</span><span class="n">csv</span><span class="p">(</span><span class="n">the</span><span class="p">.</span><span class="n">train</span><span class="p">),</span><span class="kc">true</span><span class="p">).</span><span class="n">rows</span><span class="p">)</span> <span class="kr">do</span>
<span class="kr">if</span> <span class="p">(</span><span class="n">k</span> <span class="o">%</span> <span class="mi">40</span><span class="p">)</span> <span class="o">==</span><span class="mi">1</span> <span class="kr">then</span> <span class="nb">print</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">o</span><span class="p">(</span><span class="n">t</span><span class="p">))</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-5'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-5'>#</a>
</div>
<hr />
<!-- ._ _ _. o ._ -->
<!-- | | | (_| | | | -->
<h2>main</h2>
</div>
<div class='code'>
<div class="highlight"><pre><span class="kd">local</span> <span class="kr">function</span> <span class="nf">main</span><span class="p">()</span>
<span class="n">the</span> <span class="o">=</span> <span class="n">cli</span><span class="p">(</span><span class="n">the</span><span class="p">)</span>
<span class="kr">if</span> <span class="n">the</span><span class="p">.</span><span class="n">help</span> <span class="kr">then</span> <span class="nb">print</span><span class="p">(</span><span class="n">help</span><span class="p">)</span> <span class="kr">else</span>
<span class="kr">if</span> <span class="n">the</span><span class="p">.</span><span class="n">version</span> <span class="kr">then</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;ruler.lua v0.1&quot;</span><span class="p">)</span> <span class="kr">else</span>
<span class="nb">math.randomseed</span><span class="p">(</span><span class="n">the</span><span class="p">.</span><span class="n">seed</span><span class="p">)</span>
<span class="kr">if</span> <span class="n">eg</span><span class="p">[</span><span class="n">the</span><span class="p">.</span><span class="n">Run</span><span class="p">]</span> <span class="kr">then</span> <span class="n">eg</span><span class="p">[</span><span class="n">the</span><span class="p">.</span><span class="n">Run</span><span class="p">]()</span> <span class="kr">else</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;?&quot;</span><span class="p">)</span> <span class="kr">end</span>
<span class="n">rogues</span><span class="p">()</span> <span class="kr">end</span> <span class="kr">end</span> <span class="kr">end</span>
<span class="n">the</span> <span class="o">=</span> <span class="n">settings</span><span class="p">(</span><span class="n">help</span><span class="p">)</span>
<span class="kr">if</span> <span class="nb">pcall</span><span class="p">(</span><span class="nb">debug.getlocal</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="kr">then</span> <span class="kr">return</span> <span class="p">{</span><span class="n">DATA</span><span class="o">=</span><span class="n">DATA</span><span class="p">,</span> <span class="n">the</span><span class="o">=</span><span class="n">the</span><span class="p">,</span> <span class="n">csv</span><span class="o">=</span><span class="n">csv</span><span class="p">}</span>
<span class="kr">else</span> <span class="n">main</span><span class="p">()</span>
<span class="kr">end</span>
</pre></div>
</div>
</div>
<div class='clearall'></div>
</div>
</body>
#!/usr/bin/env lua
-- <!-- vim: set ts=2 sw=2 sts=2 et: -->
-- ### My types:
--
-- - `list`s have numeric integers.
-- - `dict`s have key indexes.
-- - `table` = `list` | `dict`
-- - `number` = `float` | `init`
-- - `atom` = `str` | `bool` | `number`
-- - `COL` = `NUM` | `SYM`
-- - `row` = `list[atom]`
-- - `rows` = `list[row]`
-- - `klasses` = `dict[src,rows]`
local m = {} -- this module
local help = [[
ruler.lua : v0.1: an experiment in Chebyshev weighting
(c) Tim Menzies <[email protected]> BSD2
USAGE: ./ruler.lua [OPTIONS]
SETTINGS:
-b --bins = 7
-h --help = false
-l --label = 4
-L --Label = 20
-n --ndecs = 3
-R --Run = nothing
-s --seed = 1234567891
-t --train = ../ezr/data/misc/auto93.csv
-v --version = false]]
local l = require"lib"
local the = l.settings(help) -- "the" is of type dict[str,atom]
local big,by,csv,has,o,push,sort = l.big,l.by,l.csv,l.has,l.o,l.push,l.sort
-- ---------------------------------------------------------------------------------------
-- ## Columns
-- ### Structs
-- `SYM(s:str, n:int) -> SYM`
-- Incrementally summarizes a stream of symbols.
function m.SYM(s,n) return {at=n, txt=s, n=0, seen={}, most=0, mode=nil, ys={}} end
-- `NUM(s:str, n:int) -> NUM`
-- Incrementally summarizes a stream of numbers.
function m.NUM(s,n) return {at=n, txt=s, n=0, mu=0, m2=0, sd=0, lo=big, hi=-big,
want = (s or ""):find"-$" and 0 or 1, ys={}} end
-- `isNum(col:COL) -> bool`
function m.isNum(col) return col.mu end
-- ### Query
-- `mid(col:COL) -> atom`
-- Central tendency of a distribution.
function m.mid(col) return m.isNum(col) and col.mu or col.mode end
-- `div(col:COL) -> float`
-- Diversity away from central tendency..
function m.div(col) return m.isNum(col) and col.sd or m.entropy(col.seen) end
-- `norm(num:NUM, n:number) -> 0..1 | ?`
function m.norm(num,x)
return x=="?" and x or (x-num.lo)/(num.hi - num.lo + 1/big) end
-- `i_binNum(num:NUM,n:number) -> dict`
-- Private. Map `n` to the range 1..the.bins.
local function _binNum(num,n)
return math.min(the.bins, 1 + (l.phi(n,col.mu,col.sd) * the.bins)//1) end
-- `bin(col:COL, x:atom) -> dict`
function m.bin(col, x)
return (x=="?" or not m.isNum(col)) and x or \
math.min(the.bins, 1 + (norm(col,x) * the.bins) // 1 )
-- ### Update
-- `_add2Sym(sym:SYM, x:atom) -> nil`
-- Private. Add something to a SYM. Called by `add2Col()`.
local function _add2Sym(sym,x)
sym.seen[x] = 1 + (sym.seen[x] or 0)
if sym.seen[x] > sym.most then sym.most,sym.mode = sym.seen[x], x end end
-- `i_add2Num(num:NUM, n:atom) -> nil`
-- Private. Add something to a NUM. Called by `add2Col()`.
local function _add2Num(num,n, d)
num.lo = math.min(num.lo, n)
num.hi = math.max(num.hi, n)
d = n - num.mu
num.mu = num.mu + d/num.n
num.m2 = num.m2 + d*(n - num.mu)
num.sd = num.n < 2 and 0 or (num.m2/(num.n - 1))^0.5 end
-- `add2col(col:COL, x:atom) -> nil`
-- Add anything that is not `"?"` to a COL.
function m.add2col(col,x)
if x ~= "?" then
col.n = col.n + 1
(m.isNum(col) and _add2Num or _add2Sym)(col,x) end end
-- `add2col(col:COL, t: table, ?f:fun) -> COL`
function m.adds2col(col,t, f)
f= f or function(x) return x end
for _,x in pairs(t) do add2col(col,f(x)) end; return col end
-- ---------------------------------------------------------------------------------------
-- ## Data
-- ### Structs
-- `DATA() -> DATA`
-- Private. Place to store `rows` of data, summarized in the `cols` (columns).
-- Called by `DATA()`.
local function _DATA() return {rows={}, cols={}} end
-- `COLS(names: list[str]) -> COLS`
-- Place to store all the different roles of our columns; e.g. Independents or dependents
-- are stored in `x` or `y`. And every column is also held in `all`.
function m.COLS(names) return {names=names, all={}, x={}, y={}} end
-- ### Queries
-- `chbyshev(data:DAtA, t:list[atom]) -> float`
-- Max distance (normalizes 0..1) of any goal to the most desired value of that column.
function m.chebyshev(data,t, n) -- je bush off"
n = 0
for y in has(data.cols.y) do
n = math.max(n, math.abs(y.want - m.norm(y, t[y.at]))) end
return n end
-- `ordered(data:DATA) -> DATA`
-- Max distance (normalizes 0..1) of any goal to the most desired value of that column.
function m.ordered(data)
table.sort(data.rows,
function(t1,t2) return m.chebyshev(data,t1) < m.chebyshev(data,t2) end)
return data end
-- `stats(data:DATA, ?cols:str="y", ?fun:fun=mid) -> dict`
-- Return a dict with `fun` applied to each the `cols`.
function m.stats(data, cols,fun, t)
t={}
for col in has(data.cols[cols or "y"]) do
t[col.txt] = l.rnd((fun or m.mid)(col), the.ndecs) end
return t end
-- ### Update
-- `_head(data:DATA,row:row,?hook:fun) -> data`
-- Private: Fill in the DATA's column headers with the appropriate COLs (NUM or SYM).
local function _head(data,row, hook)
data.hook, data.cols = hook, m.COLS(row)
for n,s in pairs(row) do
push( s:find"[!+-]$" and data.cols.y or data.cols.x,
push( data.cols.all,
(s:find"^[A-Z]" and m.NUM or m.SYM)(s,n))) end
return data end
-- `_body(data:DATA, itsRows:iterator -> row, ?isOrdered:bool=false) -> DATA` `
-- Private: Fill in the rows and summarize them into columns.
local function _body(data,itsRows, isOrdered)
for t in itsRows do
if data.hook then data.hook(data,t) end
push(data.rows, t)
for col in has(data.cols.all) do m.add2col(col, t[col.at]) end end
return isOrdered and m.ordered(data) or data end
-- ### Create
-- `DATA(itsRows:iterator -> row, ?isOrdered:bool=false, ?fun) -> DATA`
-- Initialize a new DATA using row1 of `itsRows`,
-- then fill in the other rows, then maybe sort.
function m.DATA(itsRows, isOrdered,hook)
return _body(_head(_DATA(), itsRows(),hook), itsRows, isOrdered) end
-- `clone(data:DATA, ?rows:rows,?isOrdered:bool) -> DATA`
-- To create a new `DATA` with the same structure as `data`, then
-- initialize using `data`'s column names,
-- then fill in the rows from `rows`, then maybe sort.
function m.clone(data, rows,isOrdered)
return _body(_head(_DATA(), data.cols.names), has(rows or {}), isOrdered) end
--------------------------------------
-- ## Optimise
local function _now(data, num)
num = adds2col(NUM(), data.rows, function(row) return m.chebyshev(data,row) end)
print(num.mu, num.sd) end
function m.bins(data, max,t,y,x)
max,t = 0,{}
for _,col in pairs(data.cols.x) do
i=col.at; w[i] = {}
for j=1,the.bins do w[i][j] = {lo=x, hi=x,y=0, txt=col.txt, at=col.at} end end
for _,row in pairs(data.rows) do
y = 1 - m.chebyshev(data,row) -- larger values are better
max = math.max(max,y)
print("!!", y,max,o(row))
for _,col in pairs(data.cols.x) do
x= row[col.at]
if x ~= "?" then
push(t, {x=x, y=y, txt=col.txt, at=col.at}) end end end
t = sort(t, by"y")
return t, t[1].y/10 end
-- function m.chebyshevs1(data,todo, hook, bins)
-- bins = _chebyshevBins(data,{})
-- table.sort(todo, function(a,b) return _lt(data,bins,a) < _lt(data,bins,yb) end)
-- add2Data(data, table.remove(todo))
-- if hook then hook(data) end end
-- return data,todo end
--
-- function m.rules(data, score, hook, todo,data1,bins)
-- todo = l.shuffle(data.rows)
-- data1 = clone(data, l.removes(todo, the.label))
-- for _ = 1,the.Last - the.label do data1, todo = m.chebyshevs1(data1,todo,hook) end
-- return data1.rows end
--
--------------------------------------
-- ## Demos
-- Start-up actions that can be called with `./ruler.lua -R xx`.
m.eg ={}
local eg,DATA,COLS,NUM,SYM = m.eg, m.DATA, m.COLS,m.NUM,m.SYM
function eg.the() print(o(the)) end
function eg.csv() for row in csv(the.train) do print(o(row)) end end
function eg.stats( d)
d= DATA(csv(the.train))
print(o(d.cols.x[4]))
print(o(m.stats(d, "y",m.mid)))
print(#d.rows) end
function eg.data()
for k,t in pairs(DATA(csv(the.train),true).rows) do
if (k % 40) == 1 then print(k, o(t)) end end end
function eg.bins( d)
d = DATA(csv(the.train))
i=0
t,s = m.bins(d)
for i,one in pairs(t) do
t1,s1 = one
if (i%20)==1 then print(i,s1,o(t1)) end end end
---------------------------------------
-- ## main
-- `main() -> nil`
function m.main()
the = l.cli(the)
if the.help then print(help) else
if the.version then print("ruler.lua v0.1") else
math.randomseed(the.seed)
if eg[the.Run] then eg[the.Run]() else print("?") end
l.rogues() end end end
-- Always create the settings.
the = l.settings(help)
-- If we are the top-level control, then call `main()`.
if not pcall(debug.getlocal, 4, 1) then return m.main() end
-- Share and enjoy.
m.the=the; m._help=help; return m
#!/usr/bin/env lua
-- <!-- vim: set ts=2 sw=2 sts=2 et: -->
local l,the,help = {}, {}, [[
ruler.lua : v0.1: an experiment in Chebyshev weighting
(c) Tim Menzies <[email protected]> BSD2
USAGE: ./ruler.lua [OPTIONS]
SETTINGS:
-b --bins = 7
-h --help = false
-l --label = 4
-L --Label = 20
-n --ndecs = 3
-R --Run = nothing
-s --seed = 1234567891
-t --train = ../ezr/data/misc/auto93.csv
-v --version = false]]
local b4={}; for k,_ in pairs(_ENV) do b4[k]=k end
local NUM,SYM,DATA,COLS = {},{},{},{}
local big = 1E30
function SYM:new(s,n) return {at=n, txt=s, n=0, seen={}, most=0, mode=nil} end
function NUM:new(s,n) return {at=n, txt=s, n=0, mu=0, m2=0, lo=big, hi=-big,
want = (s or ""):find"-$" and 0 or 1} end
function COLS:new(names)
self.names, self.x, self.y, self.all = names,{},{},{}
for n,s in pairs(self.names) do
self:place( (s:find"^[A-Z]*" and NUM or SYM)(s,n) ) end end
function COLS:fill(col)
push(self.all col)
if not s:find"X$" then
l.push(s:find"[!+-]$" and self.y or self.x, col)
if s:find"!$" then self.klass = col end end end
function DATA:new(it, isOrdered,hook)
self.rows, self.cols = self.rows or {}, self.cols or nil
for t in it do self:add(t, hook) end
if isOrdered then
table.sort(data.rows, function(a,b) return self:want(a) < self:want(b) end) end end
function DATA:clone(rows, ...)
local t = DATA(has({self.cols.names}))
t:new(has(rows), ...)
return t end
function SYM:mid() return self.mode end
function NUM:mid() return self.mu end
function SYM:div() return l.entropy(self.seen) end
function NUM:div() return self.n < 2 and 0 or (self.m2/(self.n - 1))^0.5 end
function DATA:add(t, hook)
if self.cols
then if hook then hook(data,t) end
push(self.rows, t)
for _,col in pairs(self.cols.all) do col:add( t[col.at] ) end
else self.cols=COLS(t) end end
function SYM:add(x)
if x ~= "?" then
self.n = self.n + 1
self.seen[x] = 1 + (self.seen[x] or 0)
if self.seen[x] > self.most then
self.mode,self.most = x,self.seen[x] end end end
function NUM:add(x, d)
if x ~= "?" then
self.n = self.n + 1
self.lo = math.min(n, self.lo)
self.hi = math.max(n, self.hi)
d = n - self.mu
self.mu = self.mu + d/self.n
self.m2 = self.m2 + d*(n - self.mu) end end
function NUM:norm(x) return x=="?" and x or (x-self.lo)/(self.hi-self.lo + 1/big) end
function DATA:want(row, d)
d=0
for _,y in pairs(self.cols.y) do d= math.max(d,math.abs(y.want - y:norm(row[c.at]))) end
return 1 - d end
-- ---------------------------------------------------------------------------------------
function SYM:bin(x) return x end
function NUM:bin(x)
return x=="?" and x or math.min(the.bins, 1 + (self:norm(x) * the.bins) // 1) end
-- ---------------------------------------------------------------------------------------
l.fmt = string.format
function l.o(t, u)
if type(t) ~= "table" then return tostring(l.rnd(t)) end
u = {}; for k,v in pairs(t) do
v=l.o(v)
u[1+#u] = #t>0 and v or l.fmt(":%s %s", k, v) end
if #t==0 then table.sort(u) end
return "(".. table.concat(u," ") ..")" end
function l.has(t, n,i)
i,n = 0,#t
return function()
i = i+1
if i<=n then return t[i] end end end local m={}
function l.push(t,x) t[1+#t] = x; return x end
function l.adds(col,t, f)
for _,x in pairs(t) do col:add(f and f(x) or x) end; return t end
function l.rnd(n, ndecs, mult)
if type(n) ~= "number" then return n end
if math.floor(n) == n then return n end
mult = 10^(ndecs or 3)
return math.floor(n * mult + 0.5) / mult end
function l.entropy(t, e,N)
N=0; for n in l.has(t) do N = N+n end
e=0; for n in l.has(t) do e = n/N * math.log(n/N,2) end
return -e end
local function _coerce1(s)
if s=="nil" then return nil else return s=="true" or s ~="false" and s or false end end
function l.coerce(s)
return math.tointeger(s) or tonumber(s) or _coerce1(s:match'^%s*(.*%S)') end
function l.settings(s, t)
t={}
for k,s1 in s:gmatch("[-][-]([%S]+)[^=]+=[%s]*([%S]+)") do t[k]=l.coerce(s1) end
return t end
function l.cli(t)
for key, s in pairs(t) do
s = tostring(key)
for argv,arg1 in pairs(arg) do
if arg1=="-"..(s:sub(1,1)) or arg1=="--"..s then
s = s=="true" and "false" or s=="false" and "true" or arg[argv+1]
t[key] = l.coerce(s) end end end
return t end
function l.obj(s, t)
t._name = s
t.__index = t
t.__tostring = l.o
return setmetatable(t, {
__call = function(_,...)
local i = setmetatable({},t)
return setmetatable(t.new(i,...) or i,t) end}) end
function l.rogues()
for k,v in pairs(_ENV) do if not b4[k] then print("Rogue?",k,type(v)) end end end
function l.runs(eg)
the = l.settings(help)
the = l.cli(the)
math.randomseed (the.seed)
eg[the.Run]()
l.rogues() end
-- ---------------------------------------------------------------------------------------
local eg={}
function eg.the() print(l.o(the)) end
function eg.num() print(NUM()) end
function eg.cols( c)
c= COLS({"name", "age+", "weight-"}).all
for _,x in pairs(c) do print(l.o(x)) end end
-- ---------------------------------------------------------------------------------------
for name,t in pairs{DATA=DATA, COLS=COLS, NUM=NUM, SYM=SYM} do l.obj(name,t) end
l.runs(eg)

todo

need rrp

simpler rules. just greedy search

  • set minwant in settigns and merge is less thn min want
    • rule geernation via greed

todo

simpler rules. just greedy search

  • set minwant in settigns and merge is less thn min want
    • rule geernation via greedyy
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment