Skip to content

Instantly share code, notes, and snippets.

@prasanthmj
Last active January 29, 2024 17:50
Show Gist options
  • Save prasanthmj/ec804de3b6355c3ca26984a892ad550d to your computer and use it in GitHub Desktop.
Save prasanthmj/ec804de3b6355c3ca26984a892ad550d to your computer and use it in GitHub Desktop.
Parse and extract data from Gmail to Google Sheet. Read full article here: http://blog.gsmart.in/parse-and-extract-data-from-gmail-to-google-sheets/
<!DOCTYPE html>
<html>
<head>
<base target="_top">
<title>Message Display Test</title>
<link rel="stylesheet" href="https://ssl.gstatic.com/docs/script/css/add-ons1.css">
</head>
<body style="padding:3em;">
<h1>Messages</h1>
<ul>
<? for(var m=0;m<messages.length;m++){ ?>
<li><?= messages[m].getSubject() ?></li>
<p><?= messages[m].getPlainBody() ?></p>
<? } ?>
</ul>
</body>
</html>
function getRelevantMessages()
{
var threads = GmailApp.search("newer_than:1d AND from:citicorp.com AND subject:Transaction confirmation AND -label:payment_processing_done",0,100);
var messages=[];
threads.forEach(function(thread)
{
messages.push(thread.getMessages()[0]);
});
return messages;
}
function parseMessageData(messages)
{
var records=[];
if(!messages)
{
//messages is undefined or null or just empty
return records;
}
for(var m=0;m<messages.length;m++)
{
var text = messages[m].getPlainBody();
var matches = text.match(/Rs\.\s+([\d\,\.]+)\s+(?:spent on card)\s+(\d+)\s+(?:on)\s+([\d\-A-Z]+)\s+(?:at)\s+([\w\s]+)\./);
if(!matches || matches.length < 5)
{
//No matches; couldn't parse continue with the next message
continue;
}
var rec = {};
rec.amount = matches[1];
rec.card = matches[2];
rec.date= matches[3];
rec.merchant = matches[4];
//cleanup data
rec.amount = parseFloat(rec.amount.replace(/,/g, ''));
records.push(rec);
}
return records;
}
function getMessagesDisplay()
{
var templ = HtmlService.createTemplateFromFile('messages');
templ.messages = getRelevantMessages();
return templ.evaluate();
}
function getParsedDataDisplay()
{
var templ = HtmlService.createTemplateFromFile('parsed');
templ.records = parseMessageData(getRelevantMessages());
return templ.evaluate();
}
function saveDataToSheet(records)
{
var spreadsheet = SpreadsheetApp.openByUrl("https://docs.google.com/spreadsheets/d/1ql-S5TjH5KoekRvRqiwuyequyxhgas/edit#gid=0");
var sheet = spreadsheet.getSheetByName("Sheet1");
for(var r=0;r<records.length;r++)
{
sheet.appendRow([records[r].date,records[r].card, records[r].merchant, records[r].amount ] );
}
}
function processTransactionEmails()
{
var messages = getRelevantMessages();
var records = parseMessageData(messages);
saveDataToSheet(records);
labelMessagesAsDone(messages);
return true;
}
function labelMessagesAsDone(messages)
{
var label = 'payment_processing_done';
var label_obj = GmailApp.getUserLabelByName(label);
if(!label_obj)
{
label_obj = GmailApp.createLabel(label);
}
for(var m =0; m < messages.length; m++ )
{
label_obj.addToThread(messages[m].getThread() );
}
}
function doGet()
{
return getParsedDataDisplay();
//return getMessagesDisplay();
}
<!DOCTYPE html>
<html>
<head>
<base target="_top">
<title>Data parsed from emails</title>
<link rel="stylesheet" href="https://ssl.gstatic.com/docs/script/css/add-ons1.css">
</head>
<body style="padding:3em;">
<h2>Parsed data from Messages</h2>
<table style="width:420px">
<thead>
<tr>
<th>Date</th><th>Merchant</th><th>Card</th><th>Amount</th>
</tr>
</thead>
<tbody>
<? for(var m=0;m<records.length;m++){ ?>
<tr>
<td><?= records[m].date ?></td>
<td><?= records[m].merchant ?></td>
<td><?= records[m].card ?></td>
<td><?= records[m].amount ?></td>
</tr>
<? }?>
</tbody>
</table>
<div class="block">
<button id="save_to_sheet" class="blue">Save Data to Sheet</button>
<span id="saving_label" style="visibility:hidden">Saving ...</span>
</div>
<script>
document.querySelector("#save_to_sheet").addEventListener("click",
function(e)
{
var self=this;
self.style.visibility = 'hidden';
var saving_label = document.querySelector("#saving_label");
saving_label.style.visibility = 'visible';
google.script.run.withSuccessHandler(function()
{
alert("Saved expense data");
self.style.visibility = 'visible';
saving_label.style.visibility = 'hidden';
}).withFailureHandler(function()
{
alert("an error occured while saving");
self.style.visibility = 'visible';
saving_label.style.visibility = 'hidden';
}).processTransactionEmails();
});
</script>
</body>
</html>
@aleckwalker
Copy link

aleckwalker commented Apr 15, 2022

Here's one function that bypasses the HTML bit and does the following:

  • Grabs relevant email threads
  • Grabs emails from those threads
  • Parses the relevant info from the emails
  • Puts the info into a Google Sheet
  • Avoids duplicates based on a unique identifier per email
function logSubmissions() 
{
  //Grab emails from powr.io pertaining to new submissions
  var threads = GmailApp.search("newer_than:1d AND in:inbox AND from:powr.io AND subject:A new pitch has been submitted to SA&E TC",0,100);
  //Collect messages from across threads
  var arrToConvert=[];
  for(var i = threads.length - 1; i >=0; i--) {
    arrToConvert.push(threads[i].getMessages());   
  }
  var messages = [];
  for(var i = 0; i < arrToConvert.length; i++) {
    messages = messages.concat(arrToConvert[i]);
  }
  Logger.log(threads.length);
  Logger.log(messages.length);
  //Open the spreadsheet
  var spreadsheet = SpreadsheetApp.openByUrl("https://docs.google.com/spreadsheets/d/1dAgAWIRLhbFnX2_2K6092uvtSuySxnX3VJaVK3DuzDQ/edit#gid=0");
  //For each email in the threads, parse the important bits
  for(var m=0;m<messages.length;m++)
  {
    var text = messages[m].getPlainBody();
    var first_name = text.match(/(?<=First Name )(.*)(?= )/)[0];
    var last_name = text.match(/(?<=Last Name )(.*)(?= )/)[0];
    var email_address = text.match(/(?<=Email Address )(.*)(?= )/)[0];
    var pitch_link = text.match(/(?<=Link to your pitch \(Dropbox, Google Drive, DocSend, etc\) )(.*)(?= )/)[0];
    var summary = text.match(/(?<=Two-sentence summary of your venture )(.*)(?= )/)[0];
    var phone = "'" + text.match(/(?<=Phone )(.*)(?= )/)[0];
    var how_heard = text.match(/(?<=How did you hear about us\? )(.*)(?= )/)[0];
    var us_venture = text.match(/(?<=Is your venture based in the US\? )(.*)(?= )/)[0];
    var stanford_venture = text.match(/(?<=Is your venture founded by a Stanford alumus\? )(.*)(?= )/)[0];
    var source = text.match(/(?<=Submission Source: )(.*)(?= )/)[0];
    //Check to see if the pitch link is already in there
    var sheet = spreadsheet.getSheetByName("Applications");
    var values = sheet.getDataRange().getValues();
    var in_sheet = false
    for (var i=0; i<values.length; i++) {
      for (var j=0; j<values[0].length; j++) {
        //Logger.log(values[i][j]);
        if(values[i][j] == pitch_link) {
          in_sheet = true;
        }
      }
    }
    //Add application to spreadsheet so long as pitch link not already in there
    if(in_sheet == false) {
      sheet.appendRow([first_name, last_name, email_address, pitch_link, summary, phone, how_heard, us_venture, stanford_venture, source]);
    }
  }
}

@FunkedelicBob
Copy link

Here's one function that bypasses the HTML bit and does the following:

Great work bringing it all into one function aleckwalker! Worked perfectly :)

@chaney99
Copy link

chaney99 commented Jan 13, 2023

When using GmailApp.search(filter,0, 100), I only get about 40 emails even though there are over 70.

It seems to be skipping emails that came on the same day even though they are different. The multiple emails all arrive at the same time every day but have different content and different ids.

Why is it skipping emails?

@chaney99
Copy link

chaney99 commented Jan 13, 2023

When using GmailApp.search(filter,0, 100), I only get about 40 emails even though there are over 70.

It seems to be skipping emails that came on the same day even though they are different. The multiple emails all arrive at the same time every day but have different content and different ids.

Why is it skipping emails?

I have determined it is skipping emails that are received at the same time. Because they are considered part of the same thread ( though not displayed that way in the gmail browser UI). So getRelevantMessages() should be updated with something like this :

var messages=[];
threads.forEach(function(thread)
{

                for ( i = 0; i < thread.getMessageCount() ; i++)
                {
                  // I am not getting all of the emails when they are received at the same time. 
                  // But this is also returning forwarded emails, etc. 
                  // So additional filter checks are necessary 

                  m = thread.getMessages()[i]; 
                  if ( m.isInInbox() && m.getFrom() == "foo.com" )
                    messages.push(thread.getMessages()[i]);
                }
                // messages.push(thread.getMessages()[0]);
              });

I am new to this but it feels like a google apps script bug.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment