Skip to content

Instantly share code, notes, and snippets.

Last active August 31, 2015 05:31
Show Gist options
  • Save shuxiang/637748ea392143f00fb9 to your computer and use it in GitHub Desktop.
Save shuxiang/637748ea392143f00fb9 to your computer and use it in GitHub Desktop.
get words from
"""pip install gevent pyquery requests weasyprint"""
import gevent
from gevent import monkey
from pyquery import PyQuery as pq
import requests
import re, sys
from weasyprint import HTML
domain = ""
word_book = ""
css_file = "typography.css"
output_name = "1368"
# 1156
# 1368
def get_html():
# get word =====================================================
word_lists = pq(requests.get(word_book).content)('.wordbook-wordlist-name a')
head = """<!DOCTYPE html>
<head lang="en">
<meta charset="UTF-8">
<title>{{ title }} </title>
<!--<link rel="stylesheet" href="./%s" />-->
<div class="container">
tail = """</div></body></html>"""
word_file = file('./%s.html'%output_name, 'w+')
def each_list(i, v):
url = domain + pq(v).attr('href')
word_list = requests.get(url).content
m ='Math\.ceil\((\d+)\s{0,}\/\s{0,}(\d+)\)', word_list)
m1, m2 = int(, int(
pages = (m1/m2 + 1) if m1%m2 > 0 else m1/m2
print url, pages, '...............'
for i in range(pages):
content = requests.get(url+"?page=%s"%i).content
word_file.write('<table class="table table-bordered table-striped">')
word_file.write(pq(content)('.span8 table').html().encode('utf8'))
def render_pdf():
# to pdf ===================================================
word_file = file('./%s.html'%output_name, 'r')
print 'rendering pdf ...............'
html_out =
HTML(string=html_out).write_pdf('./%s.pdf'%output_name, stylesheets=[css_file])
if __name__ == '__main__':
cmd = sys.argv[1] if len(sys.argv) > 1 else ''
if cmd == 'html':
elif cmd == 'pdf':
/* --------------------------------------------------------------
* Sets up some sensible default typography.
-------------------------------------------------------------- */
/* Default font settings.
The font-size percentage is of 16px. (0.75 * 16px = 12px) */
html { font-size:100.01%; }
body {
font-size: 75%;
color: #222;
background: #fff;
font-family: "Helvetica Neue", Arial, Helvetica, sans-serif;
/* Headings
-------------------------------------------------------------- */
h1,h2,h3,h4,h5,h6 { font-weight: normal; color: #111; }
h1 { font-size: 3em; line-height: 1; margin-bottom: 0.5em; }
h2 { font-size: 2em; margin-bottom: 0.75em; }
h3 { font-size: 1.5em; line-height: 1; margin-bottom: 1em; }
h4 { font-size: 1.2em; line-height: 1.25; margin-bottom: 1.25em; }
h5 { font-size: 1em; font-weight: bold; margin-bottom: 1.5em; }
h6 { font-size: 1em; font-weight: bold; }
h1 img, h2 img, h3 img,
h4 img, h5 img, h6 img {
margin: 0;
/* Text elements
-------------------------------------------------------------- */
p { margin: 0 0 1.5em; }
These can be used to pull an image at the start of a paragraph, so
that the text flows around it (usage: <p><img class="left">Text</p>)
.left { float: left !important; }
p .left { margin: 1.5em 1.5em 1.5em 0; padding: 0; }
.right { float: right !important; }
p .right { margin: 1.5em 0 1.5em 1.5em; padding: 0; }
a:hover { color: #09f; }
a { color: #06c; text-decoration: underline; }
blockquote { margin: 1.5em; color: #666; font-style: italic; }
strong,dfn { font-weight: bold; }
em,dfn { font-style: italic; }
sup, sub { line-height: 0; }
acronym { border-bottom: 1px dotted #666; }
address { margin: 0 0 1.5em; font-style: italic; }
del { color:#666; }
pre { margin: 1.5em 0; white-space: pre; }
pre,code,tt { font: 1em 'andale mono', 'lucida console', monospace; line-height: 1.5; }
/* Lists
-------------------------------------------------------------- */
li ul,
li ol { margin: 0; }
ul, ol { margin: 0 1.5em 1.5em 0; padding-left: 1.5em; }
ul { list-style-type: disc; }
ol { list-style-type: decimal; }
dl { margin: 0 0 1.5em 0; }
dl dt { font-weight: bold; }
dd { margin-left: 1.5em;}
/* Tables
-------------------------------------------------------------- */
Because of the need for padding on TH and TD, the vertical rhythm
on table cells has to be 27px, instead of the standard 18px or 36px
of other elements.
table { margin-bottom: 1.4em; width:100%; }
th { font-weight: bold; }
thead th { background: #c3d9ff; }
th,td,caption { padding: 4px 10px 4px 5px; }
You can zebra-stripe your tables in outdated browsers by adding
the class "even" to every other table row.
tbody tr:nth-child(even) td,
tbody tr.even td {
background: #e5ecf9;
tfoot { font-style: italic; }
caption { background: #eee; }
/* Misc classes
-------------------------------------------------------------- */
.small { font-size: .8em; margin-bottom: 1.875em; line-height: 1.875em; }
.large { font-size: 1.2em; line-height: 2.5em; margin-bottom: 1.25em; }
.hide { display: none; }
.quiet { color: #666; }
.loud { color: #000; }
.highlight { background:#ff0; }
.added { background:#060; color: #fff; }
.removed { background:#900; color: #fff; }
.first { margin-left:0; padding-left:0; }
.last { margin-right:0; padding-right:0; }
.top { margin-top:0; padding-top:0; }
.bottom { margin-bottom:0; padding-bottom:0; }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment