Skip to content

Instantly share code, notes, and snippets.

@holys
Created December 25, 2012 05:45
Show Gist options
  • Save holys/4371794 to your computer and use it in GitHub Desktop.
Save holys/4371794 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf8 -*-
import re
import requests
import urlparse
import MySQLdb
cookie = 'http://strs.gdufs.edu.cn/web'
base = 'http://202.116.194.117/'
def parse_info(url, page):
s = requests.session()
s.get(cookie)
find_re = re.compile(ur"OpenPlayer\(\'(\d{5,6})\',\'(.+?)\','(.+?)\',\'\',\'(.+?)\',.+?</span>", re.DOTALL)
conn = MySQLdb.connect(host='localhost', user='root', passwd='root')
conn.select_db('fuck')
cursor = conn.cursor()
sql = "insert into test(num, name, channel, types, downlink, format) values(%s, %s, %s, %s, %s, %s)"
for i in xrange(1, page):
u = url % (i)
res = s.get(u)
res.encoding = 'utf8'
html = res.text
print u"第%d页" % i
for x in find_re.findall(html):
new = re.sub(':?\\\\\\\\', '/', x[2])
downlink = urlparse.urljoin(base, new)
channel = new.split('/')[1]
types = new.split('/')[2]
value = [x[0], x[1], channel, types, downlink, x[3]]
cursor.execute(sql, value)
# print 'ID', '\t', x[0]
# print 'name','\t', x[1]
# print 'channel', '\t', channel
# print 'type', '\t', types
# print 'format', '\t', x[3]
# print 'downlink', '\t', downlink
# print '---------------------------'
# """
cursor.close()
conn.commit()
conn.close()
if __name__ == "__main__":
url1 = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=%d'
# url2 = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=2&page=%d'
# url3 = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=3&page=%d'
# url4 = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=4&page=%d'
# url5 = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=5&page=%d'
page1 = 504
# page2 = 5300
# page3 = 272
# page4 = 25
# page5 = 97
parse_info(url1, page1)
# parse_info(url2, page2)
# parse_info(url3, page3)
# parse_info(url4, page4)
# parse_info(url5, page5)
print "Done --------------!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment