Skip to content

Instantly share code, notes, and snippets.

@Xorcerer
Created July 25, 2013 09:34
Show Gist options
  • Save Xorcerer/6078244 to your computer and use it in GitHub Desktop.
Save Xorcerer/6078244 to your computer and use it in GitHub Desktop.
# encoding: utf-8
import re
import urllib2
import httplib
from datetime import datetime, date, timedelta
from bs4 import BeautifulSoup
prices_of_deals = {} # (region, server) : price_list
today = date.today()
yesterday = today - timedelta(days=1)
titles_of_funcs = {
len: u'游戏币交易量',
min: u'游戏币低价位',
max: u'游戏币高价位',
}
region_of_servers = {}
prefix_of_servers = {
u'黄巾起义': u'【桃】',
u'桃园结义': u'【桃】',
u'天下无双': u'【桃】',
u'五关六将': u'【桃】',
u'锦囊妙计': u'【桃】',
u'三顾茅庐': u'【三足】',
u'千里单骑': u'【桃】',
u'赤壁之战': u'【三足】',
u'赤兔宝马': u'【三足】',
u'卧龙凤雏': u'【三足】',
u'奇门遁甲': u'【三足】',
u'对酒当歌': u'【三足】',
u'草船借箭': u'【三足】',
u'舌战群儒': u'【三足】',
u'方天画戟': u'【倚】',
u'倚天青釭': u'【倚】',
u'羽扇纶巾': u'【倚】',
u'丈八蛇矛': u'【倚】',
u'青龙偃月': u'【倚】',
u'三足鼎立': u'【三足】',
u'群雄逐鹿': u'【三足】',
u'月明星稀': u'【月】',
u'戎马一生': u'【月】',
u'江山如画': u'【江山】',
u'洛神赋': u'【江山】',
u'白马义从': u'【义从】',
}
orders = (u'黄巾起义',
u'桃园结义',
u'天下无双',
u'五关六将',
u'三顾茅庐',
u'赤壁之战',
u'锦囊妙计',
u'千里单骑',
u'赤兔宝马',
u'卧龙凤雏',
u'奇门遁甲',
u'对酒当歌',
u'草船借箭',
u'舌战群儒',
u'方天画戟',
u'倚天青釭',
u'羽扇纶巾',
u'丈八蛇矛',
u'青龙偃月',
u'三足鼎立',
u'群雄逐鹿',
u'月明星稀',
u'江山如画',
u'洛神赋',
u'戎马一生',
u'白马义从',
u'白马跃溪',
u'龙胆银枪',
u'乱世之战',
u'青春飞扬',
u'斗转星移',
u'武魂降临',
u'梦回三国',
)
def print_and_save_to_file(prices_dict, filename='result.txt'):
with open(filename, 'a') as f:
def p(l):
print l
f.write(l.encode('GBK'))
f.write('\n')
l = u'------我是华丽丽的分割线---%s---\n' % datetime.now()
p(l)
for server in orders:
if server not in prices_dict:
l = u'服务器 %s 找不到\n\n' % server
p(l)
continue
prices = prices_dict[server]
region = region_of_servers[server]
prefix = prefix_of_servers.get(server, '')
for func in (len, min, max):
time = today.strftime('%Y/%m/%d 18:00')
l = u'%s %s%s "%s" %s %s' % \
(region, prefix, server, time, titles_of_funcs[func], func(prices))
p(l)
def get_soup(url):
t1 = datetime.now()
print 'Loading:', url
try:
html = urllib2.urlopen(url).read()
except httplib.IncompleteRead as e:
print 'IncompleteRead Exception, try parsing the loaded partial.'
html = e.partial
print '%s to load.' % (datetime.now() - t1,)
return BeautifulSoup(html.decode('GBK', errors='replace'))
def append_price_for_servers(region, servers_str, price_node, prices_dict):
for s in servers_str.split('/'):
region_of_servers[s] = region
price = price_node.string.split(u'元')[0]
prices_dict.setdefault(s, []).append(float(price))
def extract_prices_of_deals(url):
""" Return False if we reach yesterday's data. """
soup = get_soup(url)
date_nodes = soup.find_all(class_='li5')
todays_record_found = False
for node in date_nodes:
if node.li:
date_str = node.li.string.split(' ')[0]
item_date = datetime.strptime(date_str, '%Y-%m-%d').date()
if item_date != today:
print '%s record found.' % item_date
return todays_record_found
todays_record_found = True
price_node = node.parent.find(text=re.compile(u'^[0-9]+\\.[0-9]+元/金'))
region_servers_str = node.parent.find('li', text=re.compile(u'^游戏')).string.split(u'桃园将星录/', 1)[1]
if price_node:
region, servers_str = region_servers_str.split('/', 1)
append_price_for_servers(region, servers_str, price_node, prices_of_deals)
return True
def extract_deals():
template_url = 'http://trading.5173.com/list/viewlastestdeallist.aspx?ts=&gm=7e46ecc864a745a69294d597f71a55ec&sort=LastRefresh_DESC&pg=%d'
page = 1
while extract_prices_of_deals(template_url % page):
page += 1
print 'Total %d page(s).' % page
return prices_of_deals
prices_of_sales = {} # server : price_list
def extract_prices_of_items(url):
soup = get_soup(url)
# Removed ads.
hot_recommand_node = soup.find(class_='hot_recommend')
if hot_recommand_node:
hot_recommand_node.clear()
item_nodes = soup.find_all(class_='sin_pdlbox')
print len(item_nodes)
if not item_nodes:
return False
for item_node in item_nodes:
region_and_server_links = item_node.find(text=re.compile(u'游戏/区/服')).parent.find_all('a')
region = region_and_server_links[1].string
servers_str = region_and_server_links[2].string
price_node = item_node.find(text=re.compile(u'元/金'))
append_price_for_servers(region, servers_str, price_node, prices_of_sales)
return True
def extract_prices_for_sales():
template_url = 'http://s.5173.com/search/7e46ecc864a745a69294d597f71a55ec-%d.shtml?=&cate=af21c660f670427181b5487c3ab850d0&sort=itemprice_asc'
page = 1
while extract_prices_of_items(template_url % page):
page += 1
print 'Total %d page(s).' % page
return prices_of_sales
print_and_save_to_file(extract_deals())
# print_and_save_to_file(extract_prices_for_sales())
raw_input('Press any key to exit.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment