Last active
March 30, 2017 22:58
-
-
Save crewstyle/ab9856f53f38f96cd24049e5263f07e4 to your computer and use it in GitHub Desktop.
Google Spreadsheet - How to get data from URL's headers, using CacheService and UrlFetchApp class services
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* This script has been especially made for a particular context. | |
* So use it with caution, and do not forget to customize it before any use. | |
*/ | |
//globals | |
var sheet = SpreadsheetApp.getActiveSpreadsheet(), | |
sheets = sheet.getSheets(); | |
/** | |
* Retrieve header Response Code and Location from cache. | |
*/ | |
function getDataFromCache(url) { | |
var encodedUrl = encodeURIComponent(url), | |
cache = CacheService.getPublicCache(); | |
//get data from cache | |
var details = cache.get(encodedUrl); | |
//check cache | |
if (details != null && details != []) { | |
return details; | |
} | |
//get details from domainname.ext and www.domainname.ext | |
var data = getUrlDetails(url, false), | |
datawww = getUrlDetails(url, true); | |
//redefine details | |
details = []; | |
//store details | |
if (data != null && datawww != null) { | |
details.push(data.getResponseCode()); | |
details.push(data.getHeaders()['Location']); | |
details.push(datawww.getResponseCode()); | |
details.push(datawww.getHeaders()['Location']); | |
} | |
//cache will be good for around 3600 seconds (1 hour) | |
cache.put(encodedUrl, details, 3600); | |
return details; | |
} | |
/** | |
* Retrieve header Response Code and Location from URL depending on the subdomain "www". | |
* All URLs are in normal HTTP protocol. No Secure protocol allowed. | |
*/ | |
function getUrlDetails(url, www) { | |
var fetch; | |
try { | |
fetch = UrlFetchApp.fetch("http://" + (www ? "www." : "") + url, { | |
followRedirects: false, | |
muteHttpExceptions: true | |
}); | |
} catch(e) {} | |
return fetch; | |
} | |
/** | |
* Update URLS | |
*/ | |
function main() | |
{ | |
//configurations | |
var lineNumber = 100, | |
loadingMsg = "...", | |
currentSheet = sheets[2]; | |
//cells | |
var cellmin = currentSheet.getRange("M2"), | |
cellmax = currentSheet.getRange("N2"); | |
//check min | |
var min = parseInt(cellmin.getValue(), 10), | |
max = parseInt(cellmax.getValue(), 10); | |
//works on min | |
min = 4 > min ? 4 : min; | |
cellmin.setValue(min); | |
//works on max | |
max = (min > max || max > (min + lineNumber) || max > 978) ? (min + lineNumber) : max; | |
cellmax.setValue(max); | |
//vars | |
var val, check, cellh; | |
//check URLS | |
for (var i = min, len = max; i <= len; i++) { | |
cellmax.setValue(i); | |
cellh = currentSheet.getRange("H"+i); | |
//get the value | |
val = currentSheet.getRange("B"+i).getValue(); | |
check = cellh.getValue(); | |
//works on val | |
val = loadingMsg == val ? "" : val; | |
//check value | |
if ("" == val || "" != check) { | |
continue; | |
} | |
//loading | |
currentSheet.getRange("H"+i).setValue(loadingMsg); | |
//get complete url with schema | |
var completeurl = "http://www." + val + "/"; | |
//get url headers for domain name w/out subdomain | |
//details = [responseCode, location, wwwResponseCode, wwwLocation]; | |
var details = getDataFromCache(val); | |
Logger.log(details); | |
//loading | |
cellh.setValue(""); | |
//check values | |
if (4 == details.length) { | |
//update data | |
cellh.setValue(details[0]); | |
currentSheet.getRange("I"+i).setValue(details[1]); | |
currentSheet.getRange("J"+i).setValue(details[1] == completeurl ? "OK" : "KO"); | |
//update www.data | |
currentSheet.getRange("K"+i).setValue(details[2]); | |
currentSheet.getRange("L"+i).setValue(details[3]); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment