Last active
November 3, 2016 10:44
-
-
Save ntlk/7c8187f1c077953caea82e70b35fabe5 to your computer and use it in GitHub Desktop.
Upload MOH reports to elasticsearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const elasticUrl = 'xxx'; | |
const exec = require('child_process').exec; | |
const sleep = require('sleep'); | |
const fs = require('fs'); | |
const path = require('path'); | |
const batchSize = 2; | |
let sliceBeg = 0; | |
let sliceEnd = sliceBeg + batchSize; | |
let listFiles = (directory) => { | |
return new Promise((resolve, reject) => { | |
fs.readdir(directory, (err, files) => { | |
if (err) { | |
reject(err); | |
} else { | |
var trash = files.indexOf('.DS_Store'); | |
if (trash != -1) { | |
files.splice(trash, 1); | |
} | |
resolve(files); | |
} | |
}); | |
}); | |
}; | |
let readFile = (filename) => { | |
return new Promise((resolve, reject) => { | |
fs.readFile(filename, 'utf8', (err, data) => { | |
if (err) { | |
reject(err); | |
} else { | |
resolve(data); | |
} | |
}) | |
}); | |
}; | |
let generateTempFile = (report) => { | |
let tmpPath ='./tmp/' + report.id + '.txt'; | |
return new Promise((resolve, reject) => { | |
fs.writeFile(tmpPath, JSON.stringify(report), function(err) { | |
if(err) { | |
reject(err); | |
} else { | |
resolve(tmpPath); | |
} | |
}); | |
}); | |
}; | |
let postRequest = (tempfile, id) => { | |
return new Promise((resolve, reject) => { | |
exec('curl -XPOST ' + elasticUrl + '/report/' + id + ' --data @' + tempfile, (error, stdout, stderr) => { | |
if (error) { | |
reject(error); | |
return; | |
} | |
resolve(stdout); | |
}); | |
}); | |
} | |
const getReportDataFromFilename = (filename, reportsMeta) => { | |
let pieces = filename.split('.'); | |
let borough = pieces[0]; | |
let date = pieces[1]; | |
let id = pieces[2]; | |
reportsMeta.push({ id: id, date: date, borough: borough }); | |
} | |
const dir = './Data sources/Full text'; | |
const pageArray = Array.apply(null, { length: 1300 }); | |
pageArray.forEach((el, i) => { | |
let reportsMeta = []; | |
let reportsContents = []; | |
let reports = []; | |
let filenames = []; | |
console.log('running a new batch'); | |
listFiles(dir).then(files => { | |
filenames = files; | |
batch = files.slice(sliceBeg, sliceEnd); | |
batch.forEach(file => { | |
getReportDataFromFilename(file, reportsMeta); | |
}); | |
let filePromises = batch.map((file) => { | |
return readFile(dir + '/' + file); | |
}); | |
sliceBeg = sliceEnd + 1; | |
sliceEnd = sliceBeg + batchSize; | |
return Promise.all(filePromises); | |
}).then((fileContents) => { | |
reportsContents = fileContents; | |
reportsContents.forEach((text, index) => { | |
let report = reportsMeta[index]; | |
report.text = text; | |
reports.push(report); | |
}); | |
let tempFiles = reports.map(report => { | |
return generateTempFile(report); | |
}); | |
return Promise.all(tempFiles); | |
}).then(reports => { | |
let postedReports = reports.map((reportFilename, idx) => { | |
sleep.sleep(i + 2 + idx); | |
let id = reportFilename.replace('.txt', '').replace('./tmp/', ''); | |
return postRequest(reportFilename, id); | |
}); | |
return Promise.all(postedReports); | |
}).then(response => { | |
console.log(response); | |
}).catch(err => { | |
console.log(err.message); | |
console.log(err.stack); | |
}); | |
}); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment