Skip to content

Instantly share code, notes, and snippets.

@peterherrmann
Last active February 28, 2023 12:38
Show Gist options
  • Save peterherrmann/5691884 to your computer and use it in GitHub Desktop.
Save peterherrmann/5691884 to your computer and use it in GitHub Desktop.
outerLoop for Google Apps Script triggered functions is some boilerplate that encapsulates best practice ways to deal with running workloads that may take longer to process that the time available in an Apps Script triggered run.
//load configuration details and start logging - creates and sets up sheets the first time they are run
var CONFIG_SPREADSHEET_KEY = '<ssid_goes_here>';
var Config = SettingsManager.load(CONFIG_SPREADSHEET_KEY); //Add Mafviu9bMfg9xVu21LGfpWnHAGDwXQ1CH in Resources > Libraries
Logger = BetterLog.useSpreadsheet(Config['logSpreadsheetId'].value);//Add MYB7yzedMbnJaMKECt6Sm7FLDhaBgl_dE in Resources > Libraries
// trigger this function
function outerLoop() {
try {
// to calc elapsed time
var isOverMaxRuntime = false,
startTime = new Date();
// Deletes all occurrences of the Repeating trigger we don't end up with undeleted time based triggers all over the place
ScriptApp.getProjectTriggers().forEach(function(i) {
if (i.getHandlerFunction()==='outerLoopRepeating') {ScriptApp.deleteTrigger(i);}
});
//Logger.finer('Entering the "%s" function', arguments.callee.name);
// Handle max execution times in our outer loop
// Get start index if we hit max execution time last run
var start = parseInt(PropertiesService.getScriptProperties().getProperty(arguments.callee.name + "-start")) || 0;
var thingies = ['stuff to process', 'in an Array',,,,]; //
for (var i = start ; i < thingies.length; i++) {
if (Math.round((new Date() - startTime)/1000) > 300) { //360 seconds is Google Apps Script max run time
//We've hit max runtime.
isOverMaxRuntime = true;
break;
}
//do our work here
Logger.finest('Inside the for loop that does the xyz work. i is currently: %d', i);
var processingMessage = Utilities.formatString('%d of %d thingies: %s <%s>', i+1, thingies.length, thingyName, thingyId);
//do our work above here
}
if (isOverMaxRuntime) {
//save state in user/project prop if required
PropertiesService.getScriptProperties().setProperty(arguments.callee.name + '-start', i);
//create another trigger
ScriptApp.newTrigger('outerLoopRepeating').timeBased().everyMinutes(10).create();
Logger.info('Hit max run time - last iteration completed was i=%d', i-1);
} else {
Logger.fine('Done all the work and all iterations');
PropertiesService.getScriptProperties().deleteProperty(arguments.callee.name + '-start');
Logger.info('Completed processing all %d things with the "%s" function', thingies.length, arguments.callee.name);
}
} catch (e) {
Logger.severe('%s. While processing %s', JSON.stringify(e, null, 2), processingMessage);
throw e;
}
}
//automatically invoked from outerLoop()'s creation of a new trigger if required to get work done
function outerLoopRepeating() {
outerLoop();
}
@JJKW1984
Copy link

Job - Did you ever hear back on that question?

@Yadwinder9001
Copy link

Hello Peter,
How we add random waits between requests.
If you can set it to run async so every line will get a randomized time when it should be executed will be even better

//Add menu buttons
function onOpen(){
var ui = SpreadsheetApp.getUi();
ui.createMenu("Update list")
.addItem("Update", "updateList")
.addItem("Sort responses", "sortResponses")
.addToUi();
}

//Sort rows in response sheets by ASIN order
function sortResponses(){
//Get sheets and values
var ss = SpreadsheetApp.getActiveSpreadsheet();

var requestSheet = ss.getSheetByName("Request");
var responseSheet = ss.getSheetByName("Response");

var asinValues = requestSheet.getRange("A2:A").getDisplayValues();
var asinOrder = [];

//Consolidate asin values and order
for(var i=0;i<asinValues.length;i++){
if(asinValues[i][0]!=""){
asinOrder.push(asinValues[i][0]);
}
}

var responseRange = responseSheet.getRange("A2:F");
var responseValues = responseRange.getValues();

//Sort response values by asin order
responseValues.sort(function(a, b){
if(a[0]===""){
return 1;
}else if(b[0]===""){
return -1;
}else if(a[0]===""&&b[0]===""){
return 0;
}else if(a[0]===b[0]){
return 0;
}

if(asinOrder.indexOf(a[0])<asinOrder.indexOf(b[0])){
  return -1;
}else{
  return 1;
}

});

//Set new response values in correct order
responseRange.setValues(responseValues);
}

//Scrap and update request sheets
function updateList(){
//Get spreadsheets and values
var ss = SpreadsheetApp.getActiveSpreadsheet();

var sheet = ss.getSheetByName("Request");
var responseSheet = ss.getSheetByName("Response");

var sheetRange = sheet.getRange("A2:D");
var sheetValues = sheetRange.getValues();

var responseRange = responseSheet.getRange("A2:F");
var responseValues = responseRange.getValues();

var reviewers = [];
var newResponses = [];

//Get a list of reviewers and existing data
for(var i=0;i<responseValues.length;i++){
if(responseValues[i][2]!=""){
newResponses.push(responseValues[i]);
reviewers.push(responseValues[i][2]);
}
}

//Check if rerun is necessary
var failed = false;

//Loop through URLs
for(var i=0;i<sheetValues.length;i++){
//Check if URL is valid and whether row already has data
if((sheetValues[i][1]+"").indexOf("http")>-1&&sheetValues[i][2]===""){
try{
//Random wait between 0 and 10 seconds
// var min = 0;
// var max = 10000;
// var rand = Math.random() * (+max - +min) + +min;
// Utilities.sleep(Math.round(rand));

    //Utilities.sleep((Math.pow(2,n)*1000) + (Math.round(Math.random() * 1000)));
    //Utilities.sleep(Math.random() * 1000);
    //Fetch URL page data        
    var response = UrlFetchApp.fetch(sheetValues[i][1]).getContentText();
    
    //Get stars and number of reviews info        
    var asin = sheetValues[i][0];
    var stars = response.split('<span id="acrPopover" class="reviewCountTextLinkedHistogram noUnderline" title="')[1].split(" ")[0];
    var numReviews = response.split('<span id="acrCustomerReviewText" class="a-size-base">')[1].split(" ")[0];
    
    //Set rating and reviews for sheet        
    sheetValues[i][2] = stars;
    sheetValues[i][3] = numReviews;
    
    //Get list of reviews on first page        
    var reviews = response.split('<span class="a-profile-name">');
    
    //Loop through reviews        
    for(var i=1;i<reviews.length;i++){
      //Get reviewer name
      var reviewer = reviews[i].split("</span>")[0];

      //Check if row has been scraped before          
      if(reviewers.indexOf(reviewer)==-1){
        reviewers.push(reviewer);
      }else{
        continue;
      }
      
      //Get current review info          
      var reviewRating = reviews[i].split('<span class="a-icon-alt">')[1].split('</span>')[0];
      var reviewTitle = reviews[i].split('<span class="">')[1].split('</span>')[0];
      var reviewDate = reviews[i].split('class="a-size-base a-color-secondary review-date">')[1].split('</span>')[0];
      var reviewBody = reviews[i].split('class="a-expander-content reviewText review-text-content a-expander-partial-collapse-content"><span class="">')[1].split('</span>')[0];
      
      //Clean up review text html          
      reviewBody = reviewBody.replace(/<br\s*\/?>/mg,"\n");
      
      if(reviewBody.indexOf("&nbsp;")>-1){
        reviewBody = reviewBody.split("&nbsp;")[1];
      }
      
      //Push newly scrapped review row data          
      newResponses.push([asin, reviewRating, reviewer, reviewDate, reviewTitle, reviewBody]);
    }
    
    //Update spreadsheet with new row data        
    sheetRange.setValues(sheetValues);
    if(newResponses.length>0){
      responseSheet.getRange(2, 1, newResponses.length, newResponses[0].length).setValues(newResponses);
    }
  }catch(e){
    //Check if failed to reach server for current URL       
    var test = e;
    failed = true;
    //Utilities.sleep(1000);
    var n = 0;
    if (n == 5) {
      throw e;
    } 
    Utilities.sleep((Math.pow(2,n)*1000) + (Math.round(Math.random() * 1000)));
    continue;
  }
}

}

//Set last review data in google sheet
sheetRange.setValues(sheetValues);
responseRange.clearContent();
if(newResponses.length>0){
responseSheet.getRange(2, 1, newResponses.length, newResponses[0].length).setValues(newResponses);
}

//Pause for 10s before re-running script if there are unsuccessful rows
if(failed){
SpreadsheetApp.flush();
Utilities.sleep(10000);
updateList();
}
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment