Last active
January 4, 2024 01:53
-
-
Save Electroid/cdd5d673106342234ff5653e728f1c09 to your computer and use it in GitHub Desktop.
The RegExp challenge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Benchmark #1 - `isbot` tests | |
// | |
// Relevant issues: | |
// - https://github.com/oven-sh/bun/issues/5197 | |
// Relevant credit: | |
// - https://github.com/strager for writing this benchmark | |
// - https://www.npmjs.com/package/isbot for `isbot` | |
import { run, bench } from "mitata"; | |
import { gunzipSync } from "node:zlib"; | |
const response = await fetch( | |
"https://github.com/quick-lint/bun-vs-node/raw/17934db/bun-regexp-bug/uas.txt.gz", | |
); | |
const compressedBody = await response.arrayBuffer(); | |
const body = gunzipSync(Buffer.from(compressedBody)).toString("utf8"); | |
const userAgents = body.split("\n"); | |
const regExp = | |
/ daum[ /]| deusu\/| yadirectfetcher|(?:^| )site|(?:^|[^g])news|@[a-z]|\(at\)[a-z]|\(github\.com\/|\[at\][a-z]|^12345|^<|^[\w \.\-\(\)]+(\/v?\d+(\.\d+)?(\.\d{1,10})?)?$|^[^ ]{50,}$|^active|^ad muncher|^amaya|^anglesharp\/|^anonymous|^avsdevicesdk\/|^axios\/|^bidtellect\/|^biglotron|^btwebclient\/|^castro|^clamav[ /]|^client\/|^cobweb\/|^coccoc|^custom|^ddg[_-]android|^discourse|^dispatch\/\d|^downcast\/|^duckduckgo|^facebook|^fdm[ /]\d|^getright\/|^gozilla\/|^hatena|^hobbit|^hotzonu|^hwcdn\/|^jeode\/|^jetty\/|^jigsaw|^linkdex|^lwp[-: ]|^metauri|^microsoft bits|^movabletype|^mozilla\/\d\.\d \(compatible;?\)$|^mozilla\/\d\.\d \w*$|^navermailapp|^netsurf|^offline explorer|^php|^postman|^postrank|^python|^read|^reed|^restsharp\/|^snapchat|^space bison|^svn|^swcd |^taringa|^test certificate info|^thumbor\/|^tumblr\/|^user-agent:mozilla|^valid|^venus\/fedoraplanet|^w3c|^webbandit\/|^webcopier|^wget|^whatsapp|^xenu link sleuth|^yahoo|^yandex|^zdm\/\d|^zoom marketplace\/|^{{.*}}$|adbeat\.com|appinsights|archive|ask jeeves\/teoma|bit\.ly\/|bluecoat drtr|(?<! cu)bot|browsex|burpcollaborator|capture|catch|check|chrome-lighthouse|chromeframe|cloud|crawl|cryptoapi|dareboost|datanyze|dataprovider|dejaclick|dmbrowser|download|evc-batch\/|feed|firephp|freesafeip|ghost|gomezagent|(?<! (?:channel\/|google\/))google(?!(app|\/google| pixel))|headlesschrome\/|(?<!(?:lib))http|httrack|hubspot marketing grader|hydra|ibisbrowser|images|iplabel|ips-agent|java(?!;)|library|mail\.ru\/|manager|monitor|morningscore\/|neustar wpm|nutch|offbyone|optimize|pageburst|pagespeed|perl|phantom|pingdom|powermarks|preview|proxy|ptst[ /]\d|reader|rexx;|rigor|rss|scan|scrape|(?<! ya(?:yandex)?)search|serp ?reputation ?management|server|sogou|sparkler\/|speedcurve|spider|splash|statuscake|stumbleupon\.com|supercleaner|synapse|synthetic|taginspector\/|torrent|tracemyfile|transcoder|trendsmapresolver|twingly recon|url|virtuoso|wappalyzer|webglance|webkit2png|websitemetadataretriever|whatcms\/|wordpress|zgrab/; | |
bench("isbot", () => { | |
let count = 0; | |
for (const userAgent of userAgents) { | |
count += regExp.test(userAgent); | |
} | |
}); | |
await run(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Benchmark #2 - Markdown parser | |
// | |
// Relevant issues: | |
// - https://github.com/markedjs/marked/issues/2863 | |
// - https://github.com/oven-sh/bun/issues/3464 | |
import { run, bench } from "mitata"; | |
import { marked } from "marked"; | |
const response = await fetch( | |
"https://github.com/markedjs/marked/files/11911404/test.md", | |
); | |
const data = await response.text(); | |
bench("marked (sync)", () => { | |
marked(data, (error, result) => { | |
if (error) throw error; | |
console.log(result.length); | |
}); | |
}); | |
bench("marked (async)", async () => { | |
await marked(data, { | |
async: true, | |
}); | |
}); | |
await run(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Benchmark #3 - Capture groups | |
// | |
// Relevant issues: | |
// - https://github.com/oven-sh/bun/issues/3464#issuecomment-1774043531 | |
// Relevant credit: | |
// - https://github.com/yschroe for writing this benchmark | |
import { run, bench } from "mitata"; | |
const loremIpsum = | |
"Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; | |
const regExps = [ | |
/A{1,3}B{1,3}/g, | |
/A{1,3}(B){1,3}/g, | |
/(A)(B){1,3}/g, | |
/(A){1,3}(B){1,3}/g, | |
]; | |
bench("capture groups", () => { | |
let count = 0; | |
for (const regExp of regExps) { | |
count += loremIpsum.match(regExp)?.length; | |
} | |
}); | |
await run(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment