-
-
Save pecigonzalo/c147e3f174fca90bec66efbd9eb24ad3 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python | |
# Import a mbox file to a Google Group using https://developers.google.com/admin-sdk/groups-migration/index | |
# You'll need to install https://developers.google.com/api-client-library/python/ | |
# and enable Groups Migration API, read prerequisits of the API SDK | |
from __future__ import print_function | |
import mailbox | |
import StringIO | |
import time | |
import apiclient | |
import httplib2 | |
from apiclient import discovery | |
from oauth2client import client, tools | |
from oauth2client.file import Storage | |
try: | |
import argparse | |
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() | |
except ImportError: | |
flags = None | |
# The email address of the group to import to | |
groupId = raw_input('Enter groupId: ') | |
# https://console.developers.google.com/project/mysociety-groups-import/apiui/credential | |
# Generate a Client ID for Native Application. | |
# You'll be prompted to complete an auth flow on the first run. | |
# The user will need to be an admin. | |
scope = 'https://www.googleapis.com/auth/apps.groups.migration' | |
storage = Storage('credentials.dat') | |
credentials = storage.get() | |
if not credentials or credentials.invalid: | |
client_id = raw_input('Enter client_id: ') | |
client_secret = raw_input('Enter client_secret: ') | |
flow = client.OAuth2WebServerFlow(client_id, client_secret, scope) | |
if flags: | |
credentials = tools.run_flow(flow, storage, flags) | |
else: | |
# Needed only for compatibility with Python 2.6 | |
credentials = tools.run(flow, storage) | |
http = credentials.authorize(httplib2.Http()) | |
service = discovery.build('groupsmigration', 'v1', http=http) | |
mbox_path = raw_input('Enter mbox_path: ') | |
mb = mailbox.mbox(mbox_path) # The path of the mbox file to import | |
i = 1 | |
total_messages = len(mb) | |
for msg in mb: | |
stream = StringIO.StringIO() | |
stream.write(msg.as_string()) | |
media = apiclient.http.MediaIoBaseUpload( | |
stream, mimetype='message/rfc822') | |
response = service.archive().insert( | |
groupId=groupId, media_body=media).execute() | |
print('Message {} of {}: {}'.format( | |
i, | |
total_messages, | |
response['responseCode']) | |
) | |
i = i + 1 | |
time.sleep(1) | |
print('Done.') |
@pecigonzalo thank you very much, much appreciated.
is it possible to log (for instance) the message subject of the message that will be ignored, so i can identify them later.
something like
if message_size >= 26214400:
print(message.subject)
continue
@pecigonzalo thank you very much for your help. it works now, like i wanted.
i made this changes:
...
stream.write(msg.as_string())
message_size = msg.as_string().__sizeof__()
if message_size >= 26214400:
print('Message {} - Size {} - subject : {}'.format(i, message_size, msg['subject']))
continue
media = apiclient.http.MediaIoBaseUpload ...
one last question, ist the sleep method a necessary ?
time.sleep(1)
or is it OK to comment it, to speed up the upload.
you're right, there is a limit: https://developers.google.com/admin-sdk/groups-migration/v1/limits
Many thanks @pecigonzalo! Inspired by this I created https://github.com/XaviTorello/mail2gg (with mbox and IMAP fetching support) :)
@Yeikop you probably do not the API installed as indicated at the start of the file.
I made some improvements to the original code. First, I converted the messages to UTF-8 and replaced characters that couldn't be converted. Some of my messages were not being accepted because they lacked a Message-ID, so I added Message-ID to them. With this version, I successfully transferred thousands of messages without any issues
for msg in mb:
try:
stream = StringIO()
msg_string_with_prefix = f"Message-Id: <[email protected]>\n{msg.as_string()}"
msg_encoded = msg_string_with_prefix.encode('utf-8', errors='replace')
msg_string = msg_encoded.decode('utf-8') # Convert bytes back to a string
stream.write(msg_string)
media = apiclient.http.MediaIoBaseUpload(
stream, mimetype='message/rfc822')
response = service.archive().insert(
groupId=groupId, media_body=media).execute()
print('Message {} of {}: {}'.format(
i,
total_messages,
response['responseCode'])
)
except Exception as e:
print(f"Error in processing message {i}: {str(e)}")
i = i + 1
time.sleep(0.2)
Unfortunately, this gist no longer works as Google disabled out-of-band OAuth access.
I think (not tested) you can do the following:
You have to insert that after the
stream.write(msg.as_string())
and before themedia = apiclient.http.MediaIoBaseUpload(
Or
stream.__sizeof__()
instead ofmessage.as_string().__sizeof__()