Skip to content

Instantly share code, notes, and snippets.

@timm
Last active March 31, 2023 13:44
Show Gist options
  • Save timm/437a34c23975f93512d885ea2c311d80 to your computer and use it in GitHub Desktop.
Save timm/437a34c23975f93512d885ea2c311d80 to your computer and use it in GitHub Desktop.
multi-objective semi-supervised explanations (in julia)

juju

Url: http://tiny.cc/jujus

Noun:

  • The power associated with a juju
  • A charm superstitiously believed to embody magical powers
#!/usr/bin/env bash
julia -O0 --compile=min --startup=no $*
# vim: set et sts=2 sw=2 ts=2 :
include("lib.jl")
the,help = settings("
JUJU: multi-objective semi-supervised explabations in O(log(N)) time
(c) 2023 Tim Menzies <[email protected]> BSD-2 license
USAGE:
julia juju.jl [OPTIONS]
OPTIONS:
-c --cohen trivial is up to sd*cohen = 0.35
-f --file where to get data = ../data/auto93.csv
-g --go start up action = nothing
-h --help show help = false
-s --seed random number seed = 937162211")
@with_kw mutable struct ROW
cells=[]; id=0; klass=nothing end
(i::ROW)(col) = begin println("asdas",o(col)); i.cells[col.pos] end
@with_kw mutable struct DATA
rows=[]; cols=nothing end
@with_kw mutable struct NUM
pos=0; txt=""; n=0;
w=1; lo=10.0^64; hi=-10.0^64
mu=0; m2=0 end
@with_kw mutable struct SYM
pos=0; txt=""; n=0; w=1;
has=Dict(); most=0; mode=nothing end
function COL(pos, txt, inits=[], by=identity)
x = occursin(r"^[A-Z]", txt) ? NUM : SYM
x(pos=pos, txt=txt, w= occursin(r"-$",txt) ? -1 : 1)
[inc!(x,by(y)) for y in inits]
x end
function COLS(a)
names,all,x,y,klass = a,[],[],[],nothing
for (pos,txt) in enumerate(a)
col = COL(pos,txt)
push!(all, col)
if !occursin(r"X$", txt)
push!(occursin(r"[!\-\+]$", txt) ? y : x, col)
if occursin(r"!$", txt) klass=col end end end
(names=names, all=all, x=x, y=y, klass=klass) end
function inc!(i,x,inc=1)
if x != "?"
i.n += inc; inc1!(i,x,inc) end end
function inc1!(i::NUM,n,_)
i.lo = min(i.lo, n)
i.hi = max(i.hi, n)
d = n - i.mu
i.mu += d / i.n
i.m2 += d * (n - i.mu) end
function inc1!(i::SYM,s,inc=1)
now = i.has[s] = get(i.has,s,0) + inc
if now > i.most
i.mode, i.most = s, now end end
rnd(i::NUM,x,digits=2) = round(x, digits=2)
rnd(i::SYM,x,_) = x
mid(i::NUM) = i.mu
mid(i::SYM) = i.mode
div(i::SYM) = -sum((n/i.n*log2(n/i.n) for (_,n) in i.has))
div(i::NUM) = i.m2<0 ? 0 : (i.n<2 ? 0 : (i.m2 / (i.n - 1))^0.5)
norm(i::NUM,n) = n=="?" ? n : (n - i.lo)/(i.hi - i.lo + 1E-16)
function holds(on, also=[])
i = DATA()
holds1(i,on)
[row(i,x) for x in also]
i end
holds1(i::DATA, on::String) = [row(i,x) for x in csv(on)]
holds1(i::DATA, on::DATA) = row(i, on.cols.names)
holds1(i::DATA, on) = [row(i,x) for x in on]
_id=0
function row(i::DATA, a)
if i.cols==nothing
i.cols=COLS(a)
else
global _id = _id + 1
row(i, ROW(cells=a,id=_id)) end end
function row(i::DATA, row1::ROW)
push!(i.rows, row1)
for cols in [i.cols.x, i.cols.y]
for col in cols
inc!(col, row1(col)) end end end
function stats(i::DATA; cols=i.cols.y, fn=mid, digits=3)
d=Dict(col.txt=>rnd(col,fn(col),digits) for col in cols)
d["n"] = length(i.rows)
d end
function better(i::DATA, r1::ROW, r2::ROW)
s1, s2, n = 0, 0, length(i.cols.y)
for col in i.cols.y
x,y = norm(col, r1(col)), norm(col, r2(col))
s1 -= exp(col.w * (x-y)/n)
s2 -= exp(col.w * (y-x)/n) end
return s1/n < s2/n end
# function chop(a,x)
# xs = COL(txt=x.txt,pos=x.pos, inits=a, by=x)
# eps = the.cohen * div(xs)
# m = length(a)
# tmp,out, n = xy(), m / the.div.divs
# last=nothing
# for (i,one) in enumerate(a)
# if tmp.x.n>=n && m - i > n && one(x) != last(x)
# if x(one) - x(tmp[1]) > eps
# push!(out, tmp)
# tmp = xy() end end
# add!(tmp, one)
# last =one end
# if length(tmp.x.n) > 0 push!(out,tmp) end
# out end
#
# function bins(lst, x, y)
# function xy() (rows= [],
# x = COL(pos=x.pos,txt=x.txt),
# y = COL(pos-y.ps,txt=y.txt)) end
# function add!(xy,row)
# inc!(xy.x,row(x))
# inc!(xy.y,row(y))
# push!(xy.rows, row) end
# function merge(a)
# tmp, out, j, m = [], [], 1, length(a)
# while j <= m
# one = a[j]
# if j < m
# two = a[j+1]
# three = [ one ; two ]
# n1, n2, n3= length(one), length(two), length(three)
# sd1,sd2,sd3= sd(one,y), sd(two,y), sd(three,y)
# sd12 = n1/n3*sd1 + n2/n3*sd2
# if abs(sd1 - sd2) < 0.01 || sd12*the.div.trivial >sd3
# one = three
# j += 1 end end
# push!(tmp,one)
# j += 1 end
# return length(tmp) == length(a) ? a : merge(tmp)
# end #---------------------------
# merge(chop( sort([z for z in lst if x(z) != "?"], by=x) ))
# end
# vim: set et sts=2 sw=2 ts=2 :
using Parameters
using ResumableFunctions
rseed=937162211
function rani(nlo, nhi) int(.5 + ranf(nlo,nhi)) end
function ranf(nlo=0, nhi=1)
global rseed = (16807 * rseed) % 214748347
nlo + (nhi - nlo) * rseed / 214748347 end
int(n) = floor(Int,n)
any(a) = a[ rani(1,length(a)) ]
many(a,n) = [any(a) for _ in 1:n]
function coerce(x)
for thing in [Int32,Float64,Bool]
if (y=tryparse(thing,x)) != nothing return y end end
x end
@resumable function csv(sfile)
src = open(sfile)
while ! eof(src)
new = replace(readline(src), r"([ \t\n]|#.*)"=>"")
if sizeof(new) != 0
@yield map(coerce,split(new,",")) end end end
function settings(s; update=false) #-> settings::NamedTuple,help::String
function cli(k,v)
for (i,flag) in enumerate(ARGS)
if update && (flag=="-"*k[1] || flag=="--"*k)
v= v=="true" ? "false" : (v=="false" ? "true" : ARGS[i+1]) end end
Symbol(k) => coerce(v) end
#---------------------------
pat = r"\n *-[^-]+--(\S+)[^=]+= *(\S+)"
d = Dict(cli(k,String(v)) for (k,v) in eachmatch(pat,s))
((;d...), s) end # Julia idiom for coercing a dictionary to a named tuple
oo(x,d=2) = println(o(x,d))
o(i::AbstractString,_) = i
o(i::Bool ,_) = string(i)
o(i::Char ,_) = string(i)
o(i::Number ,d=2) = string(round(i;digits=d))
o(i::Array ,d=2) = "["*join((o(x,d) for x in i), ", ")*"]"
o(i::NamedTuple,d=2) = "{"*join(("$k="*o(v,d) for (k,v) in pairs(i)),", ")*"}"
o(i::Dict ,d=2) = "{"*join(("$k="*o(v,d) for (k,v) in sort(i)), ", ")*"}"
o(i::Any ,d=2) = begin
s, pre="$(typeof(i)){", ""
for f in sort([x for x in fieldnames(typeof(i))
if ("$x"[1] != '_')])
s = s * pre * "$f=$(o(getfield(i,f),d))"
pre=", " end
return s * "}" end
function tests(funs...)
function shout(s,c) printstyled(s;bold=true,color=c) end
global rseed,help
global the,_ = settings(help;update=true)
cache = deepcopy(the)
fails = 0
for fun in funs
k = string(fun)
if k==the.go || the.go=="all"
shout(">> $k ",:blue)
pass, rseed = true, the.seed
try
pass = fun()
catch e
@error "E> " exception=(e, catch_backtrace())
pass = false
end
the = deepcopy(cache)
if pass == false
shout("FAIL\n",:light_red)
fails += 1
else shout("PASS\n",:light_green) end end end
fails end
-include ../config/do.mk
DO_what= JUJU: semi-supervised multi-objective explanations
DO_copyright= Copyright (c) 2023 Tim Menzies, BSD-2.
DO_repos= . ../config ../data
install: $(DO_repos) packages ## get related repos
../data:
(cd ..; git clone https://gist.github.com/d47b8699d9953eef14d516d6e54e742e.git data)
../config:
(cd ..; git clone https://gist.github.com/42f78b8beec9e98434b55438f9983ecc.git config)
julia:
brew install julia
brew link julia
packages: ## install julia packages:
julia -e 'using Pkg; Pkg.add("Parameters")'
julia -e 'using Pkg; Pkg.add("ResumableFunctions")'
# using StackTraces
#
# function test1()
# x=1
# try
# print(x/a)
# catch
# stacktrace(catch_backtrace())
# end end
#
#o
function getproperty(x::Dict{String, Int64}, f::Symbol)
print(1)
get(x,f)
end
d=Dict("a"=>1, "b"=>2)
print(d.a)
function tests1(k,fun)
print("⚠️a $k")
if fun() println(" ✅"); 0 else println(" ❌"); 1 end end
print(1)
exit(5)
#
# function tests(a)
# global the,help,rseed
# fails = 0
# b4 = deepcopy(the)
# if the.help
# print(help)
# else
# for (k,fun) in pairs(a)
# if the.help==k || then.help=="all"
# rseed = the.seed
# the = deepcopy(b4)
# print("⚠️a $k")
# if fun() println(" ✅")
# else println(" ❌"); fails += 1 end end end
#
# catch e
# @error "Something went wrong" exception=(e, catch_backtrace())
# end end
#
test2()
# vim: set et sts=2 sw=2 ts=2 :
using Random
using Test
include("juju.jl")
function aa()
typeof(coerce("true")) == typeof(true) end
function bb() oo(the); false end
function cc()
n=0
for r in csv("../data/auto93.csv"); n=n+length(r) end
n==3192 end
exit(tests(aa, bb, cc))
# tests((
# libs = -> begin
# ))
# #
# @testset "jujus" begin
# @testset "libs" begin
# n=0; for r in csv("../data/auto93.csv"); n=n+length(r) end
# @test n==3192
# @test typeof(coerce("true")) == typeof(true)
# lst= sort(many([1,2,3,4],100))
# @test 1 in lst && 4 in lst
# println(the)
# oo(Dict("a"=>1,"b"=>2))
# end
# @testset "nums" begin
# ok()
# num = NUM()
# [inc!(num,ranf()) for x in 1:10^4]
# @test .49 < mid(num) < .51
# @test .28 < div(num) < .32
# end
# @testset "syms" begin
# sym = SYM()
# [inc!(sym,x) for x in "aaaabbc"]
# @test mid(sym) == 'a'
# @test 1.37 < div(sym) < 1.38
# end
# @testset "data" begin
# d=holds(the.file)
# oo(d.cols.y[3])
# oo(stats(d))
# d1=holds(d,d.rows)
# @test d1.cols.y[1].m2 == d.cols.y[1].m2
# end
# # @testset "sort" begin
# # d = holds(the.file)
# # println("b4 ",o(stats(d)))
# # a=sort(d.rows, lt= (x,y) -> better(d,x,y))
# # println("best ", o(stats(holds(d,a[1:20]))))
# # println("rest ", o(stats(holds(d,a[21:end]))))
# # end
# end
#
true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment