Last active
December 5, 2019 07:32
-
-
Save mark-mishyn/10d6bf6731321fcaebf254a98864c3ca to your computer and use it in GitHub Desktop.
Extracts body of gmail mail
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Example of usage: | |
from googleapiclient.discovery import build | |
from httplib2 import Http | |
from oauth2client import file | |
# Create google client first, for example | |
storage = file.Storage('path_to_google_oauth_credentials') | |
creds = storage.get() | |
http = creds.authorize(Http()) | |
gmail_client = build('gmail', 'v1', http=http) | |
# then retrieve mail data by API call | |
gmail_message_id = 12345 | |
gmail_mail = gmail_client.users().messages().get(userId='me', id=gmail_message_id).execute() | |
# finally, extract mail body | |
mail_body = get_mail_body(gmail_mail['payload']) | |
''' | |
import base64 | |
from typing import List | |
def get_mail_body(payload: dict) -> str: | |
if payload['mimeType'] in ('text/html', 'text/plain'): | |
data = payload['body'].get('data') | |
if data: | |
return encode_decode_body(data) | |
for part in sort_parts_by_mime_type(payload.get('parts', [])): | |
return get_mail_body(part) | |
return payload.get('snippet', 'Can not read gmail body') | |
def encode_decode_body(body: str) -> str: | |
return base64.urlsafe_b64decode(body.encode('ASCII')).decode() | |
def sort_parts_by_mime_type(parts: List) -> List: | |
res = [] | |
for mime_type in ('text/html', 'text/plain'): # try to extract HTML first, then plain text | |
for p in parts: | |
if p['mimeType'] == mime_type: | |
res.append(p) | |
for p in parts: | |
if p['mimeType'].startswith('multipart'): | |
res.append(p) | |
return res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment