Created
June 12, 2019 14:01
-
-
Save chenx6/0faa7e46eaebec600409c86e3120b3db to your computer and use it in GitHub Desktop.
恢复 Bilibili 收藏夹中失效视频
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
需要 Python 3.6 + requests | |
''' | |
import requests | |
from math import ceil | |
from urllib import parse | |
import time | |
class favlist(): | |
apiUrl = 'https://api.bilibili.com/medialist/gateway/base/spaceDetail?media_id={fid}&pn={pn}&ps=20' | |
def __init__(self, url: str): | |
''' | |
初始化类,对 URL 进行解析,并从 API 中获得收藏夹视频数目 | |
:param url: 收藏夹 URL | |
Usage:: | |
favlist('https://space.bilibili.com/3992364/favlist?fid=45027464') | |
''' | |
params = self.parse_url(url) | |
self.fid = params['fid'][0] | |
favList = self.get_content(1) | |
self.mediaCount = favList['data']['info']['media_count'] | |
def parse_url(self, inUrl: str): | |
''' | |
解析 URL,返回参数列表 | |
:param inUrl: 被解析 URL | |
:rtype: dict | |
''' | |
paramStr = parse.urlsplit(inUrl) | |
params = parse.parse_qs(paramStr[3]) | |
return params | |
def get_content(self, pn: int): | |
''' | |
通过 API 获得收藏夹内容 | |
:param pn: 收藏夹页数 | |
:rtype: dict | |
''' | |
currUrl = self.apiUrl.format(fid=self.fid, pn=pn) | |
try: | |
apiResp = requests.get(currUrl) | |
except requests.HTTPError as e: | |
print(f'get_content() error, HTTPError {e}') | |
return {} | |
except requests.ConnectionError as e: | |
print(f'get_content() error, ConnectionError {e}') | |
return {} | |
else: | |
favListJson = apiResp.json() | |
return favListJson | |
def get_failure_media(self): | |
''' | |
根据标题筛选失效视频 | |
:return: 失效视频列表 | |
:rtype: list | |
''' | |
failMedia = [] | |
currPage = 1 | |
totalPage = ceil(self.mediaCount/20) | |
print('Failure media:') | |
while currPage <= totalPage: | |
favList = self.get_content(currPage) | |
print(f'Page {currPage}:') | |
# B站程序员蜜汁英语水平,这个`medias`是怎么回事 | |
for i in favList['data']['medias']: | |
if i['title'] == '已失效视频': | |
print(f'av{i["id"]}') | |
failMedia.append(i) | |
currPage += 1 | |
return failMedia | |
def get_biliplus_data(aid: int): | |
plusUrl = f'https://hd.biliplus.com/api/aidinfo?aid={aid}' | |
try: | |
plusResp = requests.get(plusUrl, timeout=18) | |
plusJson = plusResp.json() | |
if plusJson['code'] != 0: | |
# biliPlus 文档有毒吧,code 的意义只能实验出来... | |
raise requests.HTTPError(f'status code {plusJson["code"]}') | |
except requests.HTTPError as e: | |
print(f'get_biliplus_data(), HTTPError {e}') | |
return {} | |
except requests.ConnectionError as e: | |
print(f'get_biliplus_data(), ConnectionError {e}') | |
return {} | |
else: | |
mediaData = plusJson["data"][str(aid)] | |
print( | |
f'title: {mediaData["title"]}, author: {mediaData["author"]}, av{aid}') | |
return mediaData | |
def save_to_file(inStr: str, fileName: str): | |
try: | |
fp = open(fileName, 'a') | |
except IOError as identifier: | |
fp = open(fileName, 'w') | |
finally: | |
fp.write(inStr) | |
fp.close() | |
if __name__ == "__main__": | |
hint = '输入收藏夹URL\n例子: https://space.bilibili.com/3992364/favlist?fid=45027464\n> ' | |
url = input(hint) | |
f = favlist(url) | |
fail = f.get_failure_media() | |
print('Failure videos detail:') | |
for i in fail: | |
save_to_file(f'av{i["id"]}\n', 'avNum.txt') | |
plus = get_biliplus_data(i['id']) | |
if len(plus) != 0: | |
save_to_file(f'{plus["title"]},{plus["author"]},av{i["id"]}\n', 'result.csv') | |
# biliPlus 的土豆服务器一分钟只能请求 5 次 | |
# 而且还经常爆炸 | |
time.sleep(20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment