When splitting a large string, it is best to use a worker thread, microservice, or a lambda. However, if you are looking into event-loop based solutions, you have to leverage JavaScript's api for scheduling tasks.
Code:
const createAwaitTick = () => ({ then(fn) { setImmediate(fn) } });
async function *_charGenerator(str, batchSize) {
let i = batchSize
for (const c of str) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield c;
}
}
/**
* Async generator string splitter for large strings.
*/
export async function *splitString(str, delim, batchSize=50_000) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf#return_value
if (delim.length === 0) return yield* _charGenerator(str, batchSize);
let idx = 0;
let prev = 0;
const dl = delim.length
for (let i = batchSize; ~(idx = str.indexOf(delim, prev)); i--) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield str.substring(prev, idx);
prev = idx + dl;
}
yield str.substring(prev);
}
https://gist.github.com/nopeless/fbd79e81e5b09b1e27db042bf3b24b14 (leave a star if you find it interesting)
The await
keyword doesn't actually care if it is a Promise object or not, as long as it has a .then
. This is why BlueBird Promises work (and are actually faster). setImmdiate
queues the function execution as a macro task. The api is a little too complex to explain here, but ask in comments if you are interested. Relevent article here: https://nodejs.dev/en/learn/understanding-setimmediate/ . In a nutshell, setImmediate
will allow IO bond tasks to run first when all synchronous code is run (unlike process.nextTick
). For those who are curious about await Promise.resolve()
, it is put into the Promise micro task queue, and has similar issues to process.nextTick()
. setImmediate
is the only answer
This code in itself isn't that useful but I hope it helps you understand how to convert synchronous functions into asynchronous ones when needed :)
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
const createAwaitTick = () => ({ then(fn) { setImmediate(fn) } });
async function *_charGenerator(str, batchSize) {
let i = batchSize
for (const c of str) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield c;
}
}
/**
* Async generator string splitter for large strings.
*/
export async function *splitString(str, delim, batchSize=50_000) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf#return_value
if (delim.length === 0) return yield* _charGenerator(str, batchSize);
let idx = 0;
let prev = 0;
const dl = delim.length
for (let i = batchSize; ~(idx = str.indexOf(delim, prev)); i--) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield str.substring(prev, idx);
prev = idx + dl;
}
yield str.substring(prev);
}
const longString = "foobar;".repeat(50_000_000);
(async () => {
const s = Date.now();
for (let i = 0; i < 100; i++) {
// Represents a synchronous operation needed by other calls
console.log(Date.now() - s + "ms");
await sleep(10);
}
})();
(async () => {
let count = 0;
for await (const v of splitString(longString, ";")) {
// console.log(v);
count++;
}
// Should print the number of times repeated + 1
console.log("counted total" + count);
})();