-
-
Save gauravssnl/9c4feb880d28ab44792a99ba4ea11f6f to your computer and use it in GitHub Desktop.
Downloading Slides from Indico
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Tool to download conference information from Indico | |
""" | |
import json, argparse, sys, re, logging, os, importlib | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(format='%(levelname)s: %(message)s') | |
for module in ('unidecode', 'requests'): | |
try: | |
locals()[module] = importlib.import_module(module) | |
except ImportError: | |
logger.critical('You need to download the {} Python package to use this tool.'.format(module)) | |
sys.exit(1) | |
parser = argparse.ArgumentParser(description=__doc__) | |
parser.add_argument('resource', metavar='URL_OR_FILE', help='HTTP URL or local json file') | |
parser.add_argument('--output-folder', '-o', default='./', help='Folder to store the output in. Default: current working directory') | |
parser.add_argument('--interactive', '-i', help='Enable an interactive mode before downloading files.') | |
args = parser.parse_args() | |
if args.interactive: | |
try: | |
import IPython | |
except ImportError: | |
parser.error('Need to install IPython to use --interactive mode') | |
if re.match(r'^https?://', args.resource): | |
params = { | |
'occ': 'yes', | |
'pretty': 'yes', | |
'detail': 'contributions', | |
} | |
try: | |
r = requests.get(args.resource, params=params) | |
except (requests.exceptions.InvalidURL, requests.exceptions.ConnectionError) as e: | |
msg = 'Could not fetch the URL' | |
try: | |
msg += ': ' + str(e.args[0].args[0]) | |
except: pass | |
logger.critical(msg) | |
sys.exit(2) | |
#: jr = json response | |
jr = json.loads(r.text) | |
else: | |
with open(args.resource, 'r') as d: | |
jr = json.loads(d.read()) | |
assert 'count' in jr | |
assert jr['count'] == 1 | |
cs = jr['results'][0]['contributions'] # contributions | |
cs.sort(key=lambda c: int(c['id'])) | |
if args.interactive: | |
IPython.embed() | |
os.makedirs(args.output_folder, exist_ok=True) | |
with open(os.path.join(args.output_folder, '000_conference.txt'), 'w') as fp: | |
def fprint(line, additional=None): | |
fp.write(line + '\n') | |
if additional: additional.write(line + '\n') | |
print(line) | |
for c in cs: | |
fprint('----') | |
slug = re.sub(r'\W+', '-', unidecode.unidecode(c['title']).lower()) | |
slug = slug[:22] | |
try: | |
# if numeric ID, convert it to a 3-digit number with leading zeros | |
c['id'] = '{:03d}'.format(int(c['id'])) | |
except: | |
pass | |
with open(os.path.join(args.output_folder, c['id'] + '_' + slug + '.txt'), 'w') as cfp: | |
with open(os.path.join(args.output_folder, c['id'] + '_' + slug + '.json'), 'w') as cfp_json: | |
cfp_json.write(json.dumps(c)) | |
for prop in ('ID', 'Title', 'Type', 'Track'): | |
fprint('{0}: {1}'.format(prop, c[prop.lower()]), cfp) | |
fprint(c['description'], cfp) | |
for people_group in ('primaryauthors', 'speakers', 'coauthors'): | |
names = (a['fullName'] for a in c[people_group]) | |
names = (' '.join(name.split(', ')[::-1]) for name in names) | |
fprint('{0}: {1}'.format(people_group.capitalize(), ', '.join(names)), cfp) | |
attachments = [] | |
if 'folders' in c and len(c['folders']) >= 1: | |
assert len(c['folders']) == 1 | |
assert 'attachments' in c['folders'][0] | |
for a in c['folders'][0]['attachments']: | |
assert 'download_url' in a | |
if 'filename' not in a: | |
logger.warning('Cannot download this file: ' + a['download_url']) | |
continue | |
local_filename = c['id'] + '_' + a['filename'] | |
attachments.append(local_filename) | |
local_filename = os.path.join(args.output_folder, local_filename) | |
if os.path.isfile(local_filename) and os.path.getsize(local_filename) == a['size']: continue | |
try: | |
with open(local_filename, 'wb') as handle: | |
response = requests.get(a['download_url'], stream=True) | |
if not response.ok: | |
logger.warning("Couldn't download the file {}") | |
continue | |
for block in response.iter_content(1024): | |
handle.write(block) | |
except KeyboardInterrupt as e: | |
# We don't want unfinished downloads | |
os.unlink(local_filename) | |
raise e | |
fprint("Attachments: {}".format(' '.join(attachments)), cfp) | |
fp.flush() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment