Skip to content

Instantly share code, notes, and snippets.

@chenx6
Created June 12, 2019 14:01
Show Gist options
  • Save chenx6/0faa7e46eaebec600409c86e3120b3db to your computer and use it in GitHub Desktop.
Save chenx6/0faa7e46eaebec600409c86e3120b3db to your computer and use it in GitHub Desktop.
恢复 Bilibili 收藏夹中失效视频
'''
需要 Python 3.6 + requests
'''
import requests
from math import ceil
from urllib import parse
import time
class favlist():
apiUrl = 'https://api.bilibili.com/medialist/gateway/base/spaceDetail?media_id={fid}&pn={pn}&ps=20'
def __init__(self, url: str):
'''
初始化类,对 URL 进行解析,并从 API 中获得收藏夹视频数目
:param url: 收藏夹 URL
Usage::
favlist('https://space.bilibili.com/3992364/favlist?fid=45027464')
'''
params = self.parse_url(url)
self.fid = params['fid'][0]
favList = self.get_content(1)
self.mediaCount = favList['data']['info']['media_count']
def parse_url(self, inUrl: str):
'''
解析 URL,返回参数列表
:param inUrl: 被解析 URL
:rtype: dict
'''
paramStr = parse.urlsplit(inUrl)
params = parse.parse_qs(paramStr[3])
return params
def get_content(self, pn: int):
'''
通过 API 获得收藏夹内容
:param pn: 收藏夹页数
:rtype: dict
'''
currUrl = self.apiUrl.format(fid=self.fid, pn=pn)
try:
apiResp = requests.get(currUrl)
except requests.HTTPError as e:
print(f'get_content() error, HTTPError {e}')
return {}
except requests.ConnectionError as e:
print(f'get_content() error, ConnectionError {e}')
return {}
else:
favListJson = apiResp.json()
return favListJson
def get_failure_media(self):
'''
根据标题筛选失效视频
:return: 失效视频列表
:rtype: list
'''
failMedia = []
currPage = 1
totalPage = ceil(self.mediaCount/20)
print('Failure media:')
while currPage <= totalPage:
favList = self.get_content(currPage)
print(f'Page {currPage}:')
# B站程序员蜜汁英语水平,这个`medias`是怎么回事
for i in favList['data']['medias']:
if i['title'] == '已失效视频':
print(f'av{i["id"]}')
failMedia.append(i)
currPage += 1
return failMedia
def get_biliplus_data(aid: int):
plusUrl = f'https://hd.biliplus.com/api/aidinfo?aid={aid}'
try:
plusResp = requests.get(plusUrl, timeout=18)
plusJson = plusResp.json()
if plusJson['code'] != 0:
# biliPlus 文档有毒吧,code 的意义只能实验出来...
raise requests.HTTPError(f'status code {plusJson["code"]}')
except requests.HTTPError as e:
print(f'get_biliplus_data(), HTTPError {e}')
return {}
except requests.ConnectionError as e:
print(f'get_biliplus_data(), ConnectionError {e}')
return {}
else:
mediaData = plusJson["data"][str(aid)]
print(
f'title: {mediaData["title"]}, author: {mediaData["author"]}, av{aid}')
return mediaData
def save_to_file(inStr: str, fileName: str):
try:
fp = open(fileName, 'a')
except IOError as identifier:
fp = open(fileName, 'w')
finally:
fp.write(inStr)
fp.close()
if __name__ == "__main__":
hint = '输入收藏夹URL\n例子: https://space.bilibili.com/3992364/favlist?fid=45027464\n> '
url = input(hint)
f = favlist(url)
fail = f.get_failure_media()
print('Failure videos detail:')
for i in fail:
save_to_file(f'av{i["id"]}\n', 'avNum.txt')
plus = get_biliplus_data(i['id'])
if len(plus) != 0:
save_to_file(f'{plus["title"]},{plus["author"]},av{i["id"]}\n', 'result.csv')
# biliPlus 的土豆服务器一分钟只能请求 5 次
# 而且还经常爆炸
time.sleep(20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment