Created
September 17, 2019 08:44
-
-
Save bitifet/6c8269c6c1fe697bcb8690873d20d7f3 to your computer and use it in GitHub Desktop.
Regular expression translation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Translate well known filesystem-like wilcards ('?' for single character and '*' for 0 or more) to: | |
a) Suitable SQL pattern (with '_' and '%' instead) for 'like' and 'ilike' comparsion. | |
b) Suitable regular expression. | |
In both cases whithout breaking anything. That is, exitent meaningful ('%', and '_' for DB | |
and '.', '+', '[', etc... for regular expressions) should be properly escaped. | |
Also: | |
* Incoming '*' and '?' must be able to be escaped with backslash. | |
* In case of regular expression output, other backspaces should be escaped. | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const regexp = { | |
allWilcards: /(.?[*?%_])/g, | |
recognizedWilcards: /[*?]/, | |
}; | |
module.exports = { | |
sqlWilcards: (function(){//{{{ | |
function transpile (w){//{{{ | |
var s = ""; | |
if (w.length >= 2) { // Wilcard not at string begining. | |
if (w[0] == "\\") { // Escaped. | |
if (w[1] == "%" || w[1] == "_") { // Would mean "\%" or "\_" literal. | |
return "\\\\\\"+w[1]; // Escape both. | |
} else { // Escaping '*' or '?'. | |
return w[1]; // Drop escaping charecters because they aren't actual wilcards. | |
}; | |
// Never gets here. | |
} else { // Any other preceding character. | |
if (w[0] == "%" || w[0] == "_") s+="\\"; // Escape if needed. | |
s+=w[0]; // Output as is. | |
}; | |
w=w[1]; // Drop processed preffix. | |
}; | |
// Wilcard is first character or any possible escapping have been handled. | |
switch(w){ | |
case "%": | |
case "_": | |
s+="\\"+w; // Escape it. | |
break; | |
case "*": | |
s+="%"; | |
break; | |
case "?": | |
s+="_"; | |
break; | |
}; | |
return s; | |
};//}}} | |
return function(str){ | |
var out = str | |
.replace(/\*+/g, '*') // Sanityze multiple consecutive '*'. | |
.replace(regexp.allWilcards, transpile) | |
; | |
return out; | |
}; | |
})(),//}}} | |
hasSqlWilcards: function(str) {//{{{ | |
return !! str.match(regexp.recognizedWilcards); | |
},//}}} | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const wSpecs = { | |
sql: { | |
'\\*': '%', | |
'\\?': '_', | |
}, | |
regex: { | |
'\\*': '\\.\\*', | |
'\\?': '\\.', | |
}, | |
}; | |
function transpiler(spc) { | |
const allBits = [ | |
...Object.keys(spc).filter(x=>x), | |
...Object.values(spc), | |
// Other characters to be escaped: | |
...["\\+", "\\?", "\\+", "\\|", "\\\\"], | |
]; | |
const re = new RegExp( | |
"((?:\\\\)*)"+"("+allBits.join("|")+")" | |
, "g" | |
) | |
console.log(re); | |
return function(txt) { | |
console.log(txt); | |
txt.replace(re, function (foo, slashes, patt) { | |
let output = ""; | |
const sl = Math.floor(slashes.length / 2); | |
const esc = slashes.length % 2; | |
output += Array(sl+1).join("\\\\\\\\"); // Escape leading slashes. | |
console.log(slashes.length, sl, esc, patt, output) | |
}); | |
}; | |
}; | |
transpiler(wSpecs.sql)("Hello * world. % percentage and \\escaped \\\\\\\\\\\\_ underscore\\."); | |
console.log("----------------"); | |
transpiler(wSpecs.regex)("Hello * world. % percentage and \\escaped \\\\_ underscore\\."); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment