Skip to content

Instantly share code, notes, and snippets.

@gslin
Created October 25, 2017 15:55
Show Gist options
  • Save gslin/da4f8756799690cdfa8b28ac4320da9c to your computer and use it in GitHub Desktop.
Save gslin/da4f8756799690cdfa8b28ac4320da9c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import lxml.html
import re
import requests
import sqlite3
def procedure():
url = 'https://24h.pchome.com.tw/?m=store&f=book_show&RG_NO=DJAZ&pageType=0'
r = requests.get(url);
r.encoding = 'big5';
html = lxml.html.fromstring(r.text)
book_date_re = re.compile('出版日:\s*(\S+)/', re.M)
book_publisher_re = re.compile('出版社:\s*(\S+)/', re.M)
for td_raw in html.cssselect('#StoreBodyContainer table[width="360"]'):
td = td_raw.cssselect('.text13')
book_name = td[0].text_content()
txt = td_raw.text_content()
book_date = book_date_re.search(txt)[0]
book_publisher = book_publisher_re.search(txt)
print('%s - %s - %s' % (book_name, book_publisher, book_date))
if __name__ == '__main__':
procedure()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment