Skip to content

Instantly share code, notes, and snippets.

@C-Rodg
Last active March 21, 2017 03:43
Show Gist options
  • Save C-Rodg/4c6e739b27bcaca0c1c82779334be302 to your computer and use it in GitHub Desktop.
Save C-Rodg/4c6e739b27bcaca0c1c82779334be302 to your computer and use it in GitHub Desktop.
A quick script that after navigating to a google search page will download the scrapped content to a csv file.
function CsvWriter(del, enc) {
this.del = del || ',';
this.enc = enc || '"';
this.escapeCol = (col) => {
if(isNaN(col)) {
if(!col) {
col = '';
} else {
col = String(col);
if(col.length > 0) {
col = col.split(this.enc).join(this.enc + this.enc);
col = this.enc + col + this.enc;
}
}
}
return col;
};
this.arrayToRow = (arr) => {
let arr2 = arr.slice(0);
let i, ii = arr2.length;
for(i = 0; i < ii; i++) {
arr2[i] = this.escapeCol(arr2[i]);
}
return arr2.join(this.del);
};
this.arrayToCSVString = (arr) => {
let arr2 = arr.slice(0);
arr2.unshift(["TITLE", "DESCRIPTION", "LINK", "DOMAIN"]); // COLUMN TITLES
let i, ii = arr2.length;
for(i = 0; i < ii; i++) {
arr2[i] = this.arrayToRow(arr2[i]);
}
return arr2.join("\r\n");
};
this.downloadCSV = (arr) => {
let csvContent = this.arrayToCSVString(arr);
csvContent = "data:text/csv;charset=utf-8," + csvContent;
let encoded = encodeURI(csvContent);
let link = document.createElement('a');
link.setAttribute('href', encoded);
link.setAttribute('download', searchTerm + '.csv');
document.body.appendChild(link);
link.click();
};
}
var inp = document.querySelector('input[title="Search"]'),
searchTerm = inp.value,
sheetArray = [],
boxes = document.querySelectorAll('div.g');
Array.prototype.forEach.call(boxes, (box) => {
var x = [],
title = box.querySelector('h3.r>a'),
site = box.querySelector('cite'),
para = box.querySelector('span.st');
var titleText = title ? title.textContent : "",
siteText = site ? site.textContent : "",
paraText = para ? para.textContent : "";
var domainEx = /^(?:https?:\/\/)?(?:[^@\/\n]+@)?(?:www\.)?([^:\/\n]+)/i,
domainMatches = siteText.match(domainEx),
domainShort = (domainMatches && domainMatches.length > 0) ? domainMatches[1] : "";
x.push(titleText, paraText, siteText, domainShort);
sheetArray.push(x);
});
let csv = new CsvWriter();
csv.downloadCSV(sheetArray);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment