huzhifeng · July 14, 2014 10:54
diff --git a/html2pdf.py b/html2pdf.py
 # -*- coding: utf-8 -*-
 import json
 import os
 import requests

 def pdfcrowd(url):
    download_url = ''
    pdfcrowd_post_url = 'http://pdfcrowd.com/form/json/convert/uri/'
    headers = {
        'Host': 'pdfcrowd.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'
    }
    payload = {
        'noCache': '1405329613577',
        'src': url,
        'conversion_source':'uri'
    }
    try:
        r = requests.post(pdfcrowd_post_url, data=payload, headers=headers)
        if(r.status_code != 200):
            print 'ruests.post failed, payload=%s, status_code=%d' % (payload, r.status_code)
            return ''
        data = r.text
        obj = json.loads(data)
        if not all(key in obj for key in ('status', 'uri')):
            print 'Invalid response obj=%s' % (obj)
            return ''
        status = obj['status']
        if status != 'ok':
            print 'status=%s' % (status)
        uri = obj['uri']
        download_url = 'http://pdfcrowd.com%s' % (uri)
    except requests.exceptions.ConnectionError as e:
        print 'ConnectionError: e=%s' % e 
    except requests.exceptions.Timeout as e:
        print 'Timeout: e=%s' % e 
    except requests.exceptions.TooManyRedirects as e:
        print 'TooManyRedirects: e=%s' % e 
    except requests.exceptions.HTTPError as e:
        print 'HTTPError: e=%s' % e 
    except requests.exceptions.RequestException as e:
        print 'RequestException: e=%s' % e 
    except:
        print 'Unkonwn exception: url=%s' % url

    return download_url

 def html2pdf(url, filename):
    if(os.path.exists(filename)):
        print 'File %s already exist' % (filename)
        return
    download_url = pdfcrowd(url)
    if not download_url:
        print 'Invalid download_url: %s' % (download_url)
        return
    download_cmd = 'wget %s -O %s' % (download_url, filename)
    ret = os.system(download_cmd)
    if(ret == 0):
        print 'Download %s successful' % (download_url)
    else:
        print 'Download %s failed, error code=%d' % (download_url, ret)

 html2pdf('http://docs.python-requests.org/en/latest/', 'python-requests-doc.pdf')
	# -- coding: utf-8 --
	import json
	import os
	import requests

	def pdfcrowd(url):
	download_url = ''
	pdfcrowd_post_url = 'http://pdfcrowd.com/form/json/convert/uri/'
	headers = {
	'Host': 'pdfcrowd.com',
	'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'
	}
	payload = {
	'noCache': '1405329613577',
	'src': url,
	'conversion_source':'uri'
	}
	try:
	r = requests.post(pdfcrowd_post_url, data=payload, headers=headers)
	if(r.status_code != 200):
	print 'ruests.post failed, payload=%s, status_code=%d' % (payload, r.status_code)
	return ''
	data = r.text
	obj = json.loads(data)
	if not all(key in obj for key in ('status', 'uri')):
	print 'Invalid response obj=%s' % (obj)
	return ''
	status = obj['status']
	if status != 'ok':
	print 'status=%s' % (status)
	uri = obj['uri']
	download_url = 'http://pdfcrowd.com%s' % (uri)
	except requests.exceptions.ConnectionError as e:
	print 'ConnectionError: e=%s' % e
	except requests.exceptions.Timeout as e:
	print 'Timeout: e=%s' % e
	except requests.exceptions.TooManyRedirects as e:
	print 'TooManyRedirects: e=%s' % e
	except requests.exceptions.HTTPError as e:
	print 'HTTPError: e=%s' % e
	except requests.exceptions.RequestException as e:
	print 'RequestException: e=%s' % e
	except:
	print 'Unkonwn exception: url=%s' % url

	return download_url

	def html2pdf(url, filename):
	if(os.path.exists(filename)):
	print 'File %s already exist' % (filename)
	return
	download_url = pdfcrowd(url)
	if not download_url:
	print 'Invalid download_url: %s' % (download_url)
	return
	download_cmd = 'wget %s -O %s' % (download_url, filename)
	ret = os.system(download_cmd)
	if(ret == 0):
	print 'Download %s successful' % (download_url)
	else:
	print 'Download %s failed, error code=%d' % (download_url, ret)

	html2pdf('http://docs.python-requests.org/en/latest/', 'python-requests-doc.pdf')