When splitting a large string, it is best to use a worker thread, microservice, or a lambda. However, if you are looking into event-loop based solutions, you have to leverage JavaScript's api for scheduling tasks.
Code:
const createAwaitTick = () => ({ then(fn) { setImmediate(fn) } });
async function *_charGenerator(str, batchSize) {
let i = batchSize
for (const c of str) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield c;
}
}
/**
* Async generator string splitter for large strings.
*/
export async function *splitString(str, delim, batchSize=50_000) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf#return_value
if (delim.length === 0) return yield* _charGenerator(str, batchSize);
let idx = 0;
let prev = 0;
const dl = delim.length
for (let i = batchSize; ~(idx = str.indexOf(delim, prev)); i--) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield str.substring(prev, idx);
prev = idx + dl;
}
yield str.substring(prev);
}https://gist.github.com/nopeless/fbd79e81e5b09b1e27db042bf3b24b14 (leave a star if you find it interesting)
The await keyword doesn't actually care if it is a Promise object or not, as long as it has a .then. This is why BlueBird Promises work (and are actually faster). setImmdiate queues the function execution as a macro task. The api is a little too complex to explain here, but ask in comments if you are interested. Relevent article here: https://nodejs.dev/en/learn/understanding-setimmediate/ . In a nutshell, setImmediate will allow IO bond tasks to run first when all synchronous code is run (unlike process.nextTick). For those who are curious about await Promise.resolve(), it is put into the Promise micro task queue, and has similar issues to process.nextTick(). setImmediate is the only answer
This code in itself isn't that useful but I hope it helps you understand how to convert synchronous functions into asynchronous ones when needed :)
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
const createAwaitTick = () => ({ then(fn) { setImmediate(fn) } });
async function *_charGenerator(str, batchSize) {
let i = batchSize
for (const c of str) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield c;
}
}
/**
* Async generator string splitter for large strings.
*/
export async function *splitString(str, delim, batchSize=50_000) {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf#return_value
if (delim.length === 0) return yield* _charGenerator(str, batchSize);
let idx = 0;
let prev = 0;
const dl = delim.length
for (let i = batchSize; ~(idx = str.indexOf(delim, prev)); i--) {
if (i === 0) {
i = batchSize;
await createAwaitTick();
}
yield str.substring(prev, idx);
prev = idx + dl;
}
yield str.substring(prev);
}
const longString = "foobar;".repeat(50_000_000);
(async () => {
const s = Date.now();
for (let i = 0; i < 100; i++) {
// Represents a synchronous operation needed by other calls
console.log(Date.now() - s + "ms");
await sleep(10);
}
})();
(async () => {
let count = 0;
for await (const v of splitString(longString, ";")) {
// console.log(v);
count++;
}
// Should print the number of times repeated + 1
console.log("counted total" + count);
})();