Last active
August 29, 2015 14:06
-
-
Save ikks/0075a7bcf8fe526ab4c3 to your computer and use it in GitHub Desktop.
Download headers from a label and filter them
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""With the Gmail API is easy to download all your messages or filter them, | |
is a good practice to have filters and put them in your own messages, so later you | |
can get some statistics. | |
You need to make all the auth dance in order to connect to the API, for development | |
purposes you'll be ok with the instructions provided by Gmail | |
https://developers.google.com/gmail/api/v1/reference/users/messages | |
""" | |
# Path to the output filename | |
OUT_FILENAME = '/tmp/flisol_emails.txt' | |
# Label Id, this corresponds to the label id you want to fetch | |
LABEL_ID = 'Label_17' | |
# substring containing the word Flisol | |
MAIL_PIECE = 'FLISOL' | |
# Path to the client_secret.json file downloaded from the Developer Console | |
CLIENT_SECRET_FILE = 'client_secret.json' | |
def get_mail_headers_by_label(gmail_service, label_id): | |
"""Returns a list of message ids that have the label_id using the gmail api, | |
follow the instructions in https://developers.google.com/gmail/api/quickstart/quickstart-python | |
to get your gmail_service | |
""" | |
resp = gmail_service.users().messages().list(userId='me', labelIds=label_id).execute() | |
messages = resp['messages'] | |
while 'nextPageToken' in resp: | |
page_token = resp['nextPageToken'] | |
resp = gmail_service.users().messages().list(userId='me', labelIds=label_id, pageToken=page_token).execute() | |
messages.extend(resp['messages']) | |
return messages | |
def fetch_and_save_message_headers(gmail_service, out_filename, messages, mail_piece): | |
"""Gets the name of a file to store email information, | |
A list of message ids | |
A substring to filter the messages that have mail_piece as substring on CC or TO | |
Stores in the file a sequence of json including the fields FROM, TO and CC | |
""" | |
# Messages has the list of messages | |
to_list = [] | |
save_to = open(out_filename, mode='w+') | |
done = 0 | |
for id_msg in messages: | |
msg = gmail_service.users().messages().get(userId='me', id=id_msg['id'], fields='payload').execute() | |
resp = [obj for obj in msg['payload']['headers'] if obj['name'] in ['To', 'Cc'] and obj['value'].upper().find(mail_piece) >= 0] | |
if resp: | |
resp.extend([obj for obj in msg['payload']['headers'] if obj['name'] == 'From']) | |
to_list.append(resp) | |
save_to.write(json.dumps(resp)) | |
done += 1 | |
if done % 100 == 0: | |
print done | |
save_to.close() | |
return to_list | |
messages = get_mail_headers_by_label(gmail_service, LABEL_ID) | |
fetch_and_save_message_headers(gmail_service, OUT_FILENAME, messages, MAIL_PIECE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment