Last active
April 27, 2022 13:25
-
-
Save JasonStoltz/41b29e71310743e94275fc02fac33095 to your computer and use it in GitHub Desktop.
App Search CLI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/** | |
* Install: | |
* | |
* Download this file and give it executuable persmissions | |
* | |
* Usage: | |
* | |
* Uploading documents | |
* --------- | |
* | |
* app-search-cli upload ~/Documents/pokemon.json http://localhost:3002 test-engine private-kayik65qbd516q1brk724aaa | |
* | |
* Concurrency and batch size can be adjusted manually in code below | |
*/ | |
const fs = require("fs"); | |
const [command, pathToJson, apiPath, engineName, apiKey] = process.argv.slice( | |
2 | |
); | |
console.log("command: ", command); | |
console.log("pathToJson: ", pathToJson); | |
console.log("apiPath: ", apiPath); | |
console.log("engineName: ", engineName); | |
console.log("apiKey: ", apiKey); | |
const apiBase = `${apiPath}/api/as/v1`; | |
const documentsUrl = `${apiBase}/engines/${engineName}/documents`; | |
// Adapted from: https://www.tomas-dvorak.cz/posts/nodejs-request-without-dependencies/ | |
const request = function(url, options = {}, requestBody = "") { | |
// return new pending promise | |
return new Promise((resolve, reject) => { | |
// select http or https module, depending on reqested url | |
const lib = url.startsWith("https") ? require("https") : require("http"); | |
const request = lib.request(url, options, response => { | |
// temporary data holder | |
const body = []; | |
// on every content chunk, push it to the data array | |
response.on("data", chunk => body.push(chunk)); | |
// we are done, resolve promise with those joined chunks | |
response.on("end", () => { | |
// handle http errors | |
if (response.statusCode < 200 || response.statusCode > 299) { | |
reject( | |
new Error(`API Error: ${response.statusCode} ${body.join(" ")}`) | |
); | |
} | |
resolve(body.join("")); | |
}); | |
}); | |
// handle connection errors of the request | |
request.on("error", err => reject(err)); | |
request.write(requestBody); | |
request.end(); | |
}); | |
}; | |
/* | |
Documents can only be indexed 100 at a time, so we index | |
our data set in batches. The following is a very simple batching function, which | |
allows for has a configurable `concurrency` variable, which allows for faster | |
indexing. | |
*/ | |
function indexDocumentsInBatches(documents) { | |
debugger; | |
const concurrency = 20; | |
const size = 100; | |
let start = 0; | |
let end = start + size; | |
let recordsIndexed = 0; | |
function nextBatch() { | |
if (start > documents.length) return []; | |
console.log(`processing batch ${start} to ${end}`); | |
const batch = documents.slice(start, end); | |
start += size; | |
end += size; | |
return batch; | |
} | |
async function batchChain() { | |
let batch = nextBatch(); | |
const batchString = JSON.stringify(batch); | |
if (batch.length === 0) return; | |
try { | |
await request( | |
documentsUrl, | |
{ | |
method: "POST", | |
headers: { | |
Authorization: `Bearer ${apiKey}`, | |
"Content-Type": "application/json", | |
"Content-Length": Buffer.byteLength(batchString) | |
} | |
}, | |
batchString | |
); | |
recordsIndexed += batch.length; | |
if (start < documents.length) { | |
return batchChain(); | |
} | |
} catch (e) { | |
console.log(e); | |
process.exit(); | |
} | |
} | |
for (let i = 0; i < concurrency; i++) { | |
batchChain(); | |
} | |
let exitOnNextTick = false; | |
setInterval(() => { | |
if (exitOnNextTick === true) { | |
console.log(`\nFinished indexing ${recordsIndexed} records.`); | |
process.exit(); | |
} | |
if (recordsIndexed >= documents.length) exitOnNextTick = true; | |
}, 500); | |
} | |
try { | |
documents = JSON.parse(fs.readFileSync(pathToJson, "utf8")); | |
} catch (e) { | |
console.error(e); | |
process.exit(1); | |
} | |
console.log(`About to process ${documents.length} documents`); | |
indexDocumentsInBatches(documents); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note that this breaks when the file is too large, so I've created this adaptation for multi-gigabyte ndjson