Skip to content

Instantly share code, notes, and snippets.

@billy3321
Created February 15, 2014 13:19
Show Gist options
  • Save billy3321/9019226 to your computer and use it in GitHub Desktop.
Save billy3321/9019226 to your computer and use it in GitHub Desktop.
Get ivod urls
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import urllib, urllib2
import json
import cookielib
from BeautifulSoup import BeautifulSoup, SoupStrainer
base_url = 'http://ivod.ly.gov.tw/'
committee_url = 'http://ivod.ly.gov.tw/Committee/CommsDate'
committee = {u'院會':19,
u'內政':1,
u'外交及國防':17,
u'經濟':5,
u'財政':6,
u'教育及文化':8,
u'交通':9,
u'司法及法制':9,
u'社會福利及衛生環境':12,
u'程序': 13,
u'紀律':23,
}
def init_cookie():
cookie=cookielib.CookieJar()
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
urllib2.install_opener(opener)
reset_cookie()
def reset_cookie():
http_header = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Host': 'ivod.ly.gov.tw'}
req = urllib2.Request('http://ivod.ly.gov.tw/', None, http_header)
web = urllib2.urlopen(req)
result = web.read()
#print result
def get_date_list(comt):
http_header = {'Referer': 'http://ivod.ly.gov.tw/Committee',
'Accept': '*/*',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
'Host': 'ivod.ly.gov.tw',
'Connection': 'keep-alive',
'X-Requested-With': 'XMLHttpRequest',
'Pragma': 'no-cache'}
req = urllib2.Request(committee_url, urllib.urlencode({'comtid': comt}), http_header)
#try:
web = urllib2.urlopen(req)
if web.getcode() == 200:
html = web.read()
result = json.loads(html)
date_list = []
for i in result['mdate']:
date_list.append(i['METDAT'])
return date_list
else:
return False
#except:
# return False
def get_movie_by_date(comit, date, page):
http_header = {'Referer': 'http://ivod.ly.gov.tw/Committee',
'Accept': '*/*',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
'Host': 'ivod.ly.gov.tw',
'Connection': 'keep-alive',
'X-Requested-With': 'XMLHttpRequest',
'Pragma': 'no-cache'}
req = urllib2.Request('http://ivod.ly.gov.tw/Committee/MovieByDate', urllib.urlencode({'comtid': comit, 'date': date, 'page': page}), http_header)
#try:
web = urllib2.urlopen(req)
if web.getcode() == 200:
html_result = web.read()
#print html_result
result = json.loads(html_result)
return result
#Find WZS_ID
else:
return False
def get_movie_url(wzs_id):
http_header = {'Referer': 'http://ivod.ly.gov.tw/Committee',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
'Host': 'ivod.ly.gov.tw',
'Connection': 'keep-alive'}
url = 'http://ivod.ly.gov.tw/Play/Full/%s/1M' % wzs_id
req = urllib2.Request(url, None, http_header)
web = urllib2.urlopen(req)
if web.getcode() == 200:
html_result = web.read()
xml = BeautifulSoup(html_result)
div_movie_large = xml.find('div', {'class': 'movie_large'})
if div_movie_large:
script_text = div_movie_large.find('script').text
script_text = script_text.replace("readyPlayer('http://ivod.ly.gov.tw/public/scripts/','", '')
script_text = script_text.replace("');", '')
print script_text
return script_text
#return xml
def main():
for k in committee.keys():
date_list = get_date_list(committee[k])
for date in date_list:
movie_list = get_movie_by_date(committee[k], date, 1)
full_list = []
single_list = []
for i in movie_list['full']:
item = {}
item['url'] = get_movie_url(i['MEREID'])
item['committee_name'] = i['CM_NAM']
item['desc'] = i['METDEC']
full_list.append(item)
for i in movie_list['result']:
item = {}
item['url'] = get_movie_url(i['WZS_ID'])
item['name'] = i['CH_NAM']
item['desc'] = i['METDEC']
single_list.append(item)
print full_list
print single_list
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment