Skip to content

Instantly share code, notes, and snippets.

@wegorich
Created July 6, 2018 17:41
Show Gist options
  • Save wegorich/47941bd3942ae483a953c7b94aac84ee to your computer and use it in GitHub Desktop.
Save wegorich/47941bd3942ae483a953c7b94aac84ee to your computer and use it in GitHub Desktop.
how to crawl google docs using node.js
/**
* Simple Node.js script to turn a specific page on a Google Sheet
* into a JSON object for the main purpose of HTML Templating.
*
* @author jonobr1 / http://jonobr1.com
*
*/
/* eslint func-names: ["error", "never"] */
const https = require('https');
const { StringDecoder } = require('string_decoder');
const format = 'tsv'; // Format you'd like to parse. `tsv` or `csv`
const sheetId = 0; // The Page ID of the Sheet you'd like to export. Found as `gid` in the URL.
function downloadSheet(id = '1bBeofyj-HVGEqetcjwOnSHN_m8bNDYgV48afBTpMrPw') {
return new Promise(res => {
const url = `https://docs.google.com/spreadsheets/d/${id}/export?format=${format}&id=${id}&gid=${sheetId}`;
https.get(url, resp => {
const decoder = new StringDecoder('utf8');
let buff;
resp
.on('data', data => {
buff += decoder.write(data);
})
.on('end', () => {
const json = [];
const rows = buff.split(/\r\n/i);
for (let i = 0; i < rows.length; i++) {
json.push(rows[i].split(/\t/i));
}
res(json);
console.log('Generate translations.json');
});
});
});
}
exports.get = downloadSheet;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment