Created
December 25, 2019 02:44
-
-
Save fhefh2015/c13a3b493309db153f4f53eb99038390 to your computer and use it in GitHub Desktop.
百度网盘链接有效性批量测试
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
from bs4 import BeautifulSoup | |
import time | |
import json | |
from requests import exceptions | |
class Baiduyun: | |
''' | |
判断百度网盘链接是否失效 | |
仅判断提取码类型的,其他类型直接视为失效 | |
''' | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' | |
} | |
def __init__(self,url): | |
self.url = url | |
def get_link(self): | |
match = re.search('https://pan.baidu.com/s/1(.{22})',self.url,re.S) | |
if match: | |
id = match.group(1) | |
return id | |
def verify(self): | |
id = self.get_link() | |
init_url = 'https://pan.baidu.com/share/init?surl='+id | |
response = requests.get(init_url,headers=Baiduyun.headers) | |
if response.status_code==200: | |
response.encoding='utf-8' | |
soup = BeautifulSoup(response.text,'lxml') | |
if soup.select('dl.pickpw.clearfix'): | |
clearfix = soup.select('dl.pickpw.clearfix')[0] | |
notice = clearfix.dt.string | |
if '请输入提取码' in notice: | |
print('有效') | |
return True | |
else: | |
print('已经失效') | |
return False | |
else: | |
print(response.status_code) | |
print('已经失效') | |
return False | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' | |
} | |
def get_index(url): | |
index_list = [] | |
response = requests.get(url,headers=headers) | |
if response.status_code==200: | |
# print(response.text) | |
soup = BeautifulSoup(response.text,'lxml') | |
bm_c = soup.select('#ct > div.mn > div.tl.bm > div.bm_c')[0] | |
items = bm_c.select('th > a') | |
for item in items: | |
detial_info = {} | |
href = item['href'] | |
title = item.string | |
detail_url = 'https://www.52pojie.cn/'+href | |
detial_info['detail_url'] =detail_url | |
detial_info['title'] = title | |
index_list.append(detial_info) | |
return index_list | |
def get_detail(url): | |
response = requests.get(url,headers=headers) | |
if response.status_code==200: | |
response.encoding='GB2312' | |
# <a href="https://pan.baidu.com/s/1dOOudVQxeBpSHD8YMrmKTQ" target="_blank">https://pan.baidu.com/s/1dOOudVQxeBpSHD8YMrmKTQ</a> 提取码:p5N9 | |
baiduyun_link_match = re.search('(https://pan.baidu.com/s/1.{22}).*?[提取码|密码].*?([A-Za-z0-9]{4})',response.text,re.S) | |
if baiduyun_link_match: | |
raw_url = baiduyun_link_match.group(1) | |
password = baiduyun_link_match.group(2) | |
return { | |
'url':raw_url, | |
'password':password | |
} | |
# print(baiduyun_link_match.group(1)) | |
# print(baiduyun_link_match.group(2)) | |
else: | |
print('status_code',response.status_code) | |
def save_result(content): | |
with open('result.txt','a',encoding='utf-8') as f: | |
f.write(json.dumps(content)+'\n') | |
if __name__ == "__main__": | |
for i in range(1,18): | |
try: | |
url = 'https://www.52pojie.cn/forum.php?mod=collection&action=view&ctid=1667&page={}'.format(str(i)) | |
print(url) | |
index_list = get_index(url) | |
for detial_info in index_list: | |
# print(detial_info['detail_url']) | |
detail_url = detial_info['detail_url'] | |
print(detial_info['title']) | |
result = get_detail(detail_url) | |
if result: | |
result['title'] =detial_info['title'] | |
print(result) | |
test_valid = Baiduyun(result['url']) | |
isvalid = test_valid.verify() | |
if isvalid: | |
save_result(result) | |
time.sleep(1) | |
except exceptions as e: | |
time.sleep(10) | |
print(e) | |
continue | |
except: | |
time.sleep(10) | |
continue | |
# url = '<a href="https://pan.baidu.com/s/1YC_MJ_RzcmK3EmTAKSST6w" target="_blank">https://pan.baidu.com/s/1YC_MJ_RzcmK3EmTAKSST6w</a>' | |
# # url = '<a href="https://pan.baidu.com/s/1CtUlgWRaI-bYcSwEbuAN3A" target="_blank">https://pan.baidu.com/s/1CtUlgWRaI-bYcSwEbuAN3A</a>' | |
# mybaiduyuan = Baiduyun(url) | |
# mybaiduyuan.verify() | |
#来源:https://www.52pojie.cn/thread-1061322-1-1.html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment