Skip to content

Instantly share code, notes, and snippets.

@ntlk
Last active November 3, 2016 10:44
Show Gist options
  • Save ntlk/7c8187f1c077953caea82e70b35fabe5 to your computer and use it in GitHub Desktop.
Save ntlk/7c8187f1c077953caea82e70b35fabe5 to your computer and use it in GitHub Desktop.
Upload MOH reports to elasticsearch
const elasticUrl = 'xxx';
const exec = require('child_process').exec;
const sleep = require('sleep');
const fs = require('fs');
const path = require('path');
const batchSize = 2;
let sliceBeg = 0;
let sliceEnd = sliceBeg + batchSize;
let listFiles = (directory) => {
return new Promise((resolve, reject) => {
fs.readdir(directory, (err, files) => {
if (err) {
reject(err);
} else {
var trash = files.indexOf('.DS_Store');
if (trash != -1) {
files.splice(trash, 1);
}
resolve(files);
}
});
});
};
let readFile = (filename) => {
return new Promise((resolve, reject) => {
fs.readFile(filename, 'utf8', (err, data) => {
if (err) {
reject(err);
} else {
resolve(data);
}
})
});
};
let generateTempFile = (report) => {
let tmpPath ='./tmp/' + report.id + '.txt';
return new Promise((resolve, reject) => {
fs.writeFile(tmpPath, JSON.stringify(report), function(err) {
if(err) {
reject(err);
} else {
resolve(tmpPath);
}
});
});
};
let postRequest = (tempfile, id) => {
return new Promise((resolve, reject) => {
exec('curl -XPOST ' + elasticUrl + '/report/' + id + ' --data @' + tempfile, (error, stdout, stderr) => {
if (error) {
reject(error);
return;
}
resolve(stdout);
});
});
}
const getReportDataFromFilename = (filename, reportsMeta) => {
let pieces = filename.split('.');
let borough = pieces[0];
let date = pieces[1];
let id = pieces[2];
reportsMeta.push({ id: id, date: date, borough: borough });
}
const dir = './Data sources/Full text';
const pageArray = Array.apply(null, { length: 1300 });
pageArray.forEach((el, i) => {
let reportsMeta = [];
let reportsContents = [];
let reports = [];
let filenames = [];
console.log('running a new batch');
listFiles(dir).then(files => {
filenames = files;
batch = files.slice(sliceBeg, sliceEnd);
batch.forEach(file => {
getReportDataFromFilename(file, reportsMeta);
});
let filePromises = batch.map((file) => {
return readFile(dir + '/' + file);
});
sliceBeg = sliceEnd + 1;
sliceEnd = sliceBeg + batchSize;
return Promise.all(filePromises);
}).then((fileContents) => {
reportsContents = fileContents;
reportsContents.forEach((text, index) => {
let report = reportsMeta[index];
report.text = text;
reports.push(report);
});
let tempFiles = reports.map(report => {
return generateTempFile(report);
});
return Promise.all(tempFiles);
}).then(reports => {
let postedReports = reports.map((reportFilename, idx) => {
sleep.sleep(i + 2 + idx);
let id = reportFilename.replace('.txt', '').replace('./tmp/', '');
return postRequest(reportFilename, id);
});
return Promise.all(postedReports);
}).then(response => {
console.log(response);
}).catch(err => {
console.log(err.message);
console.log(err.stack);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment