This Gist was automatically created by Carbide, a free online programming environment.
Last active
September 13, 2016 18:56
-
-
Save bijection/e9ed452d261ff3741e61013a381555e6 to your computer and use it in GitHub Desktop.
flash fill
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// function matchBefore(str, k, token){ | |
// var match | |
// while(match = token.exec(str)){ | |
// if(match.index + match[0].length === k) return match[0]; | |
// } | |
// } | |
// function matchAfter(str, k, token){ | |
// var match | |
// while(match = token.exec(str)){ | |
// if(match.index === k) return match[0]; | |
// } | |
// } | |
// show(matchingRegexes('as24zx43', 0, false)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var k = new Map() | |
var m = [] | |
k.set(1, m) | |
m.push(2) | |
k.get(1) | |
'a'==='a' | |
JSON.stringify('as3as'.match(/a/g)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var k = [1,'2','a'] | |
var m = {} | |
m[k] = 1 | |
m | |
var re = /^./g | |
re.lastIndex = 0 | |
re.exec('34as') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export function CPos(k){ | |
return {type: 'CPos', value: k} | |
} | |
export function Pos(left, right, k){ | |
return {type: 'Pos', left, right, k} | |
} | |
export function TokenSeq(tokenSets){ | |
return {type: 'TokenSeq', tokenSets} | |
} | |
export function ConstStr(str){ | |
return {type: 'TokenSeq', value: str} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Tokens from './tokens.js' | |
import IParts, {Reps} from './iparts.js' | |
import {Pos, CPos, TokenSeq} from './defs.js' | |
const MAX_MATCH_LEN = 3 | |
function generateRegex(tokens, str){ | |
var parts = IParts(str) | |
return TokenSeq(tokens.map(t => parts[t])) | |
} | |
function matchTable(str, k, before){ | |
var tokens = Reps(IParts(str)) | |
var table = [] | |
var start = before ? Math.max(k - MAX_MATCH_LEN, 0) : k | |
var end = before ? k : Math.min(k + MAX_MATCH_LEN, str.length) | |
for(var i = start; i < end; i++) table.push([]); | |
tokens.forEach(token => { | |
var match; | |
var t = new RegExp(token, 'g') | |
t.lastIndex = start | |
while((match = t.exec(str)) && match.index < end){ | |
for(var i = 0; i < match[0].length; i++) table[match.index + i - start] && table[match.index + i - start].push(token); | |
} | |
}) | |
return table | |
} | |
matchTable("1 1 1asdf", 2, false) | |
function matchingRegexes_crazy(str, k, before){ | |
var table = matchTable(str, k, before) | |
var start = before ? Math.max(k - MAX_MATCH_LEN, 0) : k | |
var ret = {} | |
var i = k | |
var direction = before ? -1 : 1 | |
var generate = (curList, i, lastTok) => { | |
table[i] && table[i].forEach(t => { | |
var nextList = before ? [t, ...curList] : [...curList, t] | |
if (nextList.length > MAX_MATCH_LEN) return; // Otherwise this is prohibitively slow | |
if (t === lastTok) generate(curList, i+direction, t); | |
generate(nextList, i+direction, t) | |
ret[JSON.stringify(nextList)] = true | |
}) | |
} | |
generate([], before ? table.length - 1 : 0, null) | |
return Object.keys(ret).map(k=>JSON.parse(k)) | |
} | |
// Reps(IParts("aSa12")) | |
// function show(r){ | |
// return r.map(k=>k.toString()).join('\n') | |
// } | |
// var m = show(matchingRegexes("aSa12sadsd", 6, false)) | |
// m.length | |
function matchingRegexes(str, k, before){ | |
var tokens = Reps(IParts(str)) | |
var regexes = tokens.map(t => new Regex(t, 'g')) | |
var start = before ? k - 1 : k | |
var dir = before ? -1 : 1 | |
for(var i = start; i >= 0 && i < str.length; i += dir){ | |
regexes.forEach((r, j) => { | |
if(str[i].match(r)){ | |
} | |
}) | |
} | |
} | |
export default function GeneratePosition(str, k){ | |
var res = [CPos(k), CPos(-str.length - k)] | |
matchingRegexes(str, k, true).forEach(seqBefore => { | |
matchingRegexes(str, k, false).forEach(seqAfter => { | |
var s = [...seqBefore, ...seqAfter]; | |
var total = 0, match, c; | |
var t = new RegExp(s.join(''), 'g') | |
while(match = t.exec(str)){ | |
total++ | |
if(match.index <= k && match.index + match[0].length >= k) c = total; | |
} | |
var r1 = generateRegex(seqBefore, str) | |
var r2 = generateRegex(seqAfter, str) | |
res.push( Pos( r1, r2, new Set([c, -(total - c + 1)]) ) ) | |
}) | |
}) | |
return res | |
} | |
GeneratePosition("aaa156", 3)[5] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Tokens from './tokens.js' | |
var tokens = Object.values(Tokens) | |
export default function IParts(str) { | |
var res = {} | |
var seen = {} | |
tokens.forEach(c => { | |
var match = JSON.stringify(str.match(new RegExp(c, 'g'))) | |
var entry = seen[match] | |
if(!entry){ | |
entry = new Set() | |
seen[match] = entry | |
} | |
entry.add(c) | |
res[c] = entry | |
}) | |
return res | |
} | |
export function Reps(iparts){ | |
return Array.from(new Set(Object.values(iparts))) | |
.map(s => s.values().next().value) | |
} | |
IParts("asdf") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Tokens = { | |
NumTok: '\\d+', | |
NonNumTok: '[^\\d]+', | |
AlphTok: '[a-zA-Z]+', | |
NonAlphTok: '[^a-zA-Z]+', | |
LowerTok: '[a-z]+', | |
NonLowerTok: '[^a-z]+', | |
UpperTok: '[A-Z]+', | |
NonUpperTok: '[^A-Z]+', | |
AlphNumTok: '[a-zA-Z0-9]+', | |
NonAlphNumTok: '[^a-zA-Z0-9]+', | |
AlphNumWsTok: '[a-zA-Z0-9 ]+', | |
NonAlphNumWsTok: '[^a-zA-Z0-9 ]+', | |
WsTok: ' ', | |
StartTok: '^.', | |
EndTok: '.$', | |
DotTok: '\\.', | |
FwdSlashTok: '\\/', | |
BckSlashTok: '\\\\', | |
DashTok: '-', | |
LoDashTok: '_', | |
} | |
export default Tokens |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment