Skip to content

Instantly share code, notes, and snippets.

@cnbeining
Created June 8, 2014 17:28
Show Gist options
  • Save cnbeining/1fc6a80e3e7982d079db to your computer and use it in GitHub Desktop.
Save cnbeining/1fc6a80e3e7982d079db to your computer and use it in GitHub Desktop.
Biligrab 0.1
'''
Biligrab 0.1
Beining@ACICFG
http://www.cnbeining.com/
A most simple tool to download comments from Bilibili.
Require: Python 2.7
Usage:
python Biligrab01.py
>>>av12450
>>>p1
Simple as it goes.
'''
import sys
import os
from StringIO import StringIO
import gzip
import urllib2
import sys
#----------------------------------------------------------------------
def find_cid_api(vid, p):
"""find cid and print video detail"""
global cid
global partname
global title
global videourl
cid = 0
title = ''
partname = ''
biliurl = 'http://api.bilibili.tv/view?type=xml&appkey=876fe0ebd0e67a0f&id=' + str(vid) + '&page=' + str(p)
videourl = 'http://www.bilibili.tv/video/av'+ str(vid)+'/index_'+ str(p)+'.html'
print('Fetching webpage...')
try:
request = urllib2.Request(biliurl, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
response = urllib2.urlopen(request)
data = response.read()
data_list = data.split('\n')
for lines in data_list:
if 'cid' in lines:
cid = lines[7:-6]
print('cid is ' + str(cid))
if 'partname' in lines:
partname = lines[12:-11]
print('partname is ' + str(partname))
if 'title' in lines:
title = lines[9:-8]
print('title is ' + str(title))
except: #If API failed
print('ERROR: Cannot connect to API server!')
def main(vid, p):
cid = 0
title = ''
partname = ''
biliurl = 'http://api.bilibili.tv/view?type=xml&appkey=876fe0ebd0e67a0f&id=' + str(vid) + '&page=' + str(p)
videourl = 'http://www.bilibili.tv/video/av'+vid+'/index_'+p+'.html'
print('Fetching webpage...')
request = urllib2.Request(biliurl)
response = urllib2.urlopen(request)
data = response.read()
data_list = data.split('\n')
for lines in data_list:
if 'cid' in lines:
cid = lines[7:-6]
print('cid is ' + str(cid))
break
for lines in data_list:
if 'partname' in lines:
partname = lines[12:-11]
print('partname is ' + str(partname))
break
for lines in data_list:
if 'title' in lines:
title = lines[9:-8]
print('title is ' + str(title))
break
if cid is 0:
print('Cannot find cid, trying to do it brutely...')
print('Fetching webpage...')
request = urllib2.Request(videourl)
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
data_list = data.split('\n')
#Todo: read title
for lines in data_list:
if 'cid=' in lines:
cid = lines.split('&')
cid = cid[0].split('=')
cid = cid[-1]
print('cid is ' + str(cid))
break
'''
if cid is 0:
vid = int(int(vid) - 1)
p = 1
find_cid_api(vid-1, p)
cid = str(int(cid) + 1)
biliurl = 'http://api.bilibili.tv/view?type=xml&appkey=876fe0ebd0e67a0f&id=' + str(vid) + '&page=' + str(p)
videourl = 'http://www.bilibili.tv/video/av'+ str(vid)+'/index_'+ str(p)+'.html'
print('Fetching webpage...')
request = urllib2.Request(biliurl)
response = urllib2.urlopen(request)
data = response.read()
data_list = data.split('\n')
for lines in data_list:
if 'cid' in lines:
cid = lines[7:-6]
print('cid is ' + str(cid))
if 'partname' in lines:
partname = lines[12:-11]
print('partname is ' + str(partname))
if 'title' in lines:
title = lines[9:-8]
print('title is ' + str(title))
'''
if cid is 0:
cid = str(raw_input('Input the cid by yourself!'))
if cid is '':
exit()
#start to make folders...
if title is not '':
folder = title
else:
folder = cid
if partname is not '':
filename = partname
elif title is not '':
filename = title
else:
filename = cid
print('Fetching XML...')
os.system('curl -o "'+filename+'.xml" --compressed http://comment.bilibili.tv/'+cid+'.xml')
#os.system('gzip -d '+cid+'.xml.gz')
print('The XML file, ' + filename + '.xml should be ready...enjoy!')
exit()
vid = str(input('av'))
p = str(input('P'))
main(vid, p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment