|
import json, os, io, requests |
|
from zipfile import ZipFile, BadZipFile |
|
|
|
""" |
|
Get Mbox Email Archives |
|
For All Groups.io Subgroups |
|
""" |
|
|
|
MAX_SUBGROUPS=100 |
|
|
|
def main(): |
|
""" |
|
Iterate over every subgroup and save its archive as |
|
a separate mbox file - one mbox file per subgroup. |
|
""" |
|
group_ids = get_all_subgroups() |
|
for group_id in group_ids.keys(): |
|
|
|
# This function call below will call |
|
# get_archive_zip for each subgroup |
|
save_mbox_file(group_ids[group_id], group_id) |
|
|
|
|
|
def save_mbox_file(group_name, group_id): |
|
""" |
|
Save the mailbox zip file for an mbox for a subgroup, |
|
and extract the mailbox to a subgroup mbox file. |
|
""" |
|
z = get_archive_zip(group_name,group_id) |
|
if z is not None: |
|
file_contents = {name:z.read(name) for name in z.namelist()} |
|
html = file_contents['messages.mbox'] |
|
|
|
# Now save the contents of the mbox file |
|
# to our mboxes folder |
|
fname = "%s.mbox"%(group_name) |
|
fname = os.path.join('mboxes',fname) |
|
if not os.path.isdir('mboxes'): |
|
os.mkdir('mboxes') |
|
with open(fname,'wb') as f: |
|
f.write(html) |
|
|
|
|
|
def get_all_subgroups(): |
|
""" |
|
Returns a dictionary where keys are subgroup ids |
|
and values are subgroup names |
|
""" |
|
url = 'https://api.groups.io/v1/getsubgroups' |
|
|
|
try: |
|
key = os.environ['GROUPSIO_SECRET_TOKEN'] |
|
except KeyError: |
|
err = "ERROR: You must set the GROUPSIO_SECRET_TOKEN environment variable. See README.md" |
|
raise Exception(err) |
|
|
|
data = [ ('group_name','dcppc'), |
|
('limit',MAX_SUBGROUPS)] |
|
|
|
response = requests.post(url,data=data,auth=(key,'')) |
|
response = response.json() |
|
dat = response['data'] |
|
|
|
all_subgroups = {} |
|
for group in dat: |
|
all_subgroups[group['id']] = group['name'] |
|
return all_subgroups |
|
|
|
|
|
def get_archive_zip(group_name,group_id): |
|
""" |
|
Use the API to extract a zipped .mbox email archive |
|
for one subgroup, and return the contents as z. |
|
""" |
|
url = "https://api.groups.io/v1/downloadarchives" |
|
|
|
try: |
|
key = os.environ['GROUPSIO_SECRET_TOKEN'] |
|
except KeyError: |
|
err = "ERROR: You must set the GROUPSIO_SECRET_TOKEN environment variable. See README.md" |
|
raise Exception(err) |
|
|
|
data = [('group_id',group_id)] |
|
|
|
print("get_archive_zip(): getting .mbox archive for subgroup %s (%s)"%(group_name,group_id)) |
|
r = requests.post(url,data=data,auth=(key,''),stream=True) |
|
|
|
try: |
|
z = ZipFile(io.BytesIO(r.content)) |
|
z.extractall() |
|
print("SUCCESS: subgroup %s worked"%(group_name)) |
|
print("") |
|
return z |
|
except BadZipFile: |
|
print("ABORTING: subgroup %s failed"%(group_name)) |
|
print(r.content.decode('utf-8')) |
|
print("") |
|
return None |
|
|
|
|
|
if __name__=="__main__": |
|
main() |
|
|