Skip to content

Instantly share code, notes, and snippets.

@josephdunn
Last active December 20, 2015 15:18
Show Gist options
  • Save josephdunn/6152744 to your computer and use it in GitHub Desktop.
Save josephdunn/6152744 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# Depends on wget, swftools, and ImageMagick.
import os
import sys
import re
import requests
from bs4 import BeautifulSoup
#dims = '-X 841 -Y 1189'
dims = '-X 2480 -Y 3508'
try:
baseURL = sys.argv[1]
except IndexError:
print 'Usage: getatmag.py <URL>'
sys.exit(1)
xmlMeta = requests.get(baseURL + '/megazine/megazine.php')
soup = BeautifulSoup(str(xmlMeta.text), 'xml')
toProcess = []
for page in soup.find_all('page'):
for img in page.find_all('img'):
if re.match('page(s|images)\/\d+\.(swf|jpg)$', img['src']):
toProcess.append(re.sub('\?time=\d+', '', img['src']))
print 'Need to get and process ' + str(len(toProcess)) + ' pages'
for page in toProcess:
os.system('wget -U Mozilla ' + baseURL + '/megazine/' + page)
toBind = []
for page in toProcess:
if re.search('swf$', page):
swfName = page.lstrip('pages/')
pngName = swfName.rstrip('.swf') + '.png'
print 'processing ' + page + ' ...'
os.system('swfrender ' + swfName + ' ' + dims + ' -o ' + pngName)
toBind.append(pngName)
else:
toBind.append(page.lstrip('pageimages/'))
print 'binding...'
os.system('convert -adjoin ' + ' '.join(toBind) + ' -page A4 out.pdf')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment