Created
February 15, 2014 13:19
-
-
Save billy3321/9019226 to your computer and use it in GitHub Desktop.
Get ivod urls
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# -*- coding: utf-8 -*- | |
import urllib, urllib2 | |
import json | |
import cookielib | |
from BeautifulSoup import BeautifulSoup, SoupStrainer | |
base_url = 'http://ivod.ly.gov.tw/' | |
committee_url = 'http://ivod.ly.gov.tw/Committee/CommsDate' | |
committee = {u'院會':19, | |
u'內政':1, | |
u'外交及國防':17, | |
u'經濟':5, | |
u'財政':6, | |
u'教育及文化':8, | |
u'交通':9, | |
u'司法及法制':9, | |
u'社會福利及衛生環境':12, | |
u'程序': 13, | |
u'紀律':23, | |
} | |
def init_cookie(): | |
cookie=cookielib.CookieJar() | |
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) | |
urllib2.install_opener(opener) | |
reset_cookie() | |
def reset_cookie(): | |
http_header = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Host': 'ivod.ly.gov.tw'} | |
req = urllib2.Request('http://ivod.ly.gov.tw/', None, http_header) | |
web = urllib2.urlopen(req) | |
result = web.read() | |
#print result | |
def get_date_list(comt): | |
http_header = {'Referer': 'http://ivod.ly.gov.tw/Committee', | |
'Accept': '*/*', | |
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', | |
'Host': 'ivod.ly.gov.tw', | |
'Connection': 'keep-alive', | |
'X-Requested-With': 'XMLHttpRequest', | |
'Pragma': 'no-cache'} | |
req = urllib2.Request(committee_url, urllib.urlencode({'comtid': comt}), http_header) | |
#try: | |
web = urllib2.urlopen(req) | |
if web.getcode() == 200: | |
html = web.read() | |
result = json.loads(html) | |
date_list = [] | |
for i in result['mdate']: | |
date_list.append(i['METDAT']) | |
return date_list | |
else: | |
return False | |
#except: | |
# return False | |
def get_movie_by_date(comit, date, page): | |
http_header = {'Referer': 'http://ivod.ly.gov.tw/Committee', | |
'Accept': '*/*', | |
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', | |
'Host': 'ivod.ly.gov.tw', | |
'Connection': 'keep-alive', | |
'X-Requested-With': 'XMLHttpRequest', | |
'Pragma': 'no-cache'} | |
req = urllib2.Request('http://ivod.ly.gov.tw/Committee/MovieByDate', urllib.urlencode({'comtid': comit, 'date': date, 'page': page}), http_header) | |
#try: | |
web = urllib2.urlopen(req) | |
if web.getcode() == 200: | |
html_result = web.read() | |
#print html_result | |
result = json.loads(html_result) | |
return result | |
#Find WZS_ID | |
else: | |
return False | |
def get_movie_url(wzs_id): | |
http_header = {'Referer': 'http://ivod.ly.gov.tw/Committee', | |
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', | |
'Host': 'ivod.ly.gov.tw', | |
'Connection': 'keep-alive'} | |
url = 'http://ivod.ly.gov.tw/Play/Full/%s/1M' % wzs_id | |
req = urllib2.Request(url, None, http_header) | |
web = urllib2.urlopen(req) | |
if web.getcode() == 200: | |
html_result = web.read() | |
xml = BeautifulSoup(html_result) | |
div_movie_large = xml.find('div', {'class': 'movie_large'}) | |
if div_movie_large: | |
script_text = div_movie_large.find('script').text | |
script_text = script_text.replace("readyPlayer('http://ivod.ly.gov.tw/public/scripts/','", '') | |
script_text = script_text.replace("');", '') | |
print script_text | |
return script_text | |
#return xml | |
def main(): | |
for k in committee.keys(): | |
date_list = get_date_list(committee[k]) | |
for date in date_list: | |
movie_list = get_movie_by_date(committee[k], date, 1) | |
full_list = [] | |
single_list = [] | |
for i in movie_list['full']: | |
item = {} | |
item['url'] = get_movie_url(i['MEREID']) | |
item['committee_name'] = i['CM_NAM'] | |
item['desc'] = i['METDEC'] | |
full_list.append(item) | |
for i in movie_list['result']: | |
item = {} | |
item['url'] = get_movie_url(i['WZS_ID']) | |
item['name'] = i['CH_NAM'] | |
item['desc'] = i['METDEC'] | |
single_list.append(item) | |
print full_list | |
print single_list | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment