Skip to content

Instantly share code, notes, and snippets.

@huzhifeng
Created July 14, 2014 10:54
Show Gist options
  • Save huzhifeng/8ccbaf9b5d068b78adba to your computer and use it in GitHub Desktop.
Save huzhifeng/8ccbaf9b5d068b78adba to your computer and use it in GitHub Desktop.
Convert html page to PDF file online and download
# -*- coding: utf-8 -*-
import json
import os
import requests
def pdfcrowd(url):
download_url = ''
pdfcrowd_post_url = 'http://pdfcrowd.com/form/json/convert/uri/'
headers = {
'Host': 'pdfcrowd.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'
}
payload = {
'noCache': '1405329613577',
'src': url,
'conversion_source':'uri'
}
try:
r = requests.post(pdfcrowd_post_url, data=payload, headers=headers)
if(r.status_code != 200):
print 'ruests.post failed, payload=%s, status_code=%d' % (payload, r.status_code)
return ''
data = r.text
obj = json.loads(data)
if not all(key in obj for key in ('status', 'uri')):
print 'Invalid response obj=%s' % (obj)
return ''
status = obj['status']
if status != 'ok':
print 'status=%s' % (status)
uri = obj['uri']
download_url = 'http://pdfcrowd.com%s' % (uri)
except requests.exceptions.ConnectionError as e:
print 'ConnectionError: e=%s' % e
except requests.exceptions.Timeout as e:
print 'Timeout: e=%s' % e
except requests.exceptions.TooManyRedirects as e:
print 'TooManyRedirects: e=%s' % e
except requests.exceptions.HTTPError as e:
print 'HTTPError: e=%s' % e
except requests.exceptions.RequestException as e:
print 'RequestException: e=%s' % e
except:
print 'Unkonwn exception: url=%s' % url
return download_url
def html2pdf(url, filename):
if(os.path.exists(filename)):
print 'File %s already exist' % (filename)
return
download_url = pdfcrowd(url)
if not download_url:
print 'Invalid download_url: %s' % (download_url)
return
download_cmd = 'wget %s -O %s' % (download_url, filename)
ret = os.system(download_cmd)
if(ret == 0):
print 'Download %s successful' % (download_url)
else:
print 'Download %s failed, error code=%d' % (download_url, ret)
html2pdf('http://docs.python-requests.org/en/latest/', 'python-requests-doc.pdf')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment