Created
December 10, 2013 11:13
-
-
Save lucemia/7889063 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- encoding=utf8 -*- | |
| import urllib | |
| import urllib2 | |
| import re | |
| html = """ | |
| ... <option value="台北市">台北市</option> | |
| ... <option value="新北市">新北市</option> | |
| ... <option value="基隆市">基隆市</option> | |
| ... <option value="宜蘭縣">宜蘭縣</option> | |
| ... <option value="新竹市">新竹市</option> | |
| ... <option value="新竹縣">新竹縣</option> | |
| ... <option value="桃園縣">桃園縣</option> | |
| ... <option value="苗栗縣">苗栗縣</option> | |
| ... <option value="台中市">台中市</option> | |
| ... <option value="彰化縣">彰化縣</option> | |
| ... <option value="南投縣">南投縣</option> | |
| ... <option value="嘉義市">嘉義市</option> | |
| ... <option value="嘉義縣">嘉義縣</option> | |
| ... <option value="雲林縣">雲林縣</option> | |
| ... <option value="台南市">台南市</option> | |
| ... <option value="高雄市">高雄市</option> | |
| ... <option value="澎湖縣">澎湖縣</option> | |
| ... <option value="金門縣">金門縣</option> | |
| ... <option value="屏東縣">屏東縣</option> | |
| ... <option value="台東縣">台東縣</option> | |
| ... <option value="花蓮縣">花蓮縣</option> | |
| ... </select>""" | |
| re_html = re.compile(r'>([^><]*)</option>') | |
| re_label = re.compile(r'>([^><]*)</label>') | |
| cs = re_html.findall(html) | |
| results = {} | |
| for c in cs: | |
| f = urllib2.urlopen('http://www.housefun.com.tw/Ashx/TopSearch/GetDist.ashx', | |
| data='sel_C=%s' % c | |
| ).read() | |
| vs = re_label.findall(f) | |
| results[c.decode('utf8')] = [k.decode('utf8') for k in vs] | |
| import json | |
| print json.dumps(results) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment