Skip to content

Instantly share code, notes, and snippets.

@fhefh2015
Created December 25, 2019 02:44
Show Gist options
  • Save fhefh2015/c13a3b493309db153f4f53eb99038390 to your computer and use it in GitHub Desktop.
Save fhefh2015/c13a3b493309db153f4f53eb99038390 to your computer and use it in GitHub Desktop.
百度网盘链接有效性批量测试
import requests
import re
from bs4 import BeautifulSoup
import time
import json
from requests import exceptions
class Baiduyun:
'''
判断百度网盘链接是否失效
仅判断提取码类型的,其他类型直接视为失效
'''
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
def __init__(self,url):
self.url = url
def get_link(self):
match = re.search('https://pan.baidu.com/s/1(.{22})',self.url,re.S)
if match:
id = match.group(1)
return id
def verify(self):
id = self.get_link()
init_url = 'https://pan.baidu.com/share/init?surl='+id
response = requests.get(init_url,headers=Baiduyun.headers)
if response.status_code==200:
response.encoding='utf-8'
soup = BeautifulSoup(response.text,'lxml')
if soup.select('dl.pickpw.clearfix'):
clearfix = soup.select('dl.pickpw.clearfix')[0]
notice = clearfix.dt.string
if '请输入提取码' in notice:
print('有效')
return True
else:
print('已经失效')
return False
else:
print(response.status_code)
print('已经失效')
return False
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
def get_index(url):
index_list = []
response = requests.get(url,headers=headers)
if response.status_code==200:
# print(response.text)
soup = BeautifulSoup(response.text,'lxml')
bm_c = soup.select('#ct > div.mn > div.tl.bm > div.bm_c')[0]
items = bm_c.select('th > a')
for item in items:
detial_info = {}
href = item['href']
title = item.string
detail_url = 'https://www.52pojie.cn/'+href
detial_info['detail_url'] =detail_url
detial_info['title'] = title
index_list.append(detial_info)
return index_list
def get_detail(url):
response = requests.get(url,headers=headers)
if response.status_code==200:
response.encoding='GB2312'
# <a href="https://pan.baidu.com/s/1dOOudVQxeBpSHD8YMrmKTQ" target="_blank">https://pan.baidu.com/s/1dOOudVQxeBpSHD8YMrmKTQ</a> 提取码:p5N9
baiduyun_link_match = re.search('(https://pan.baidu.com/s/1.{22}).*?[提取码|密码].*?([A-Za-z0-9]{4})',response.text,re.S)
if baiduyun_link_match:
raw_url = baiduyun_link_match.group(1)
password = baiduyun_link_match.group(2)
return {
'url':raw_url,
'password':password
}
# print(baiduyun_link_match.group(1))
# print(baiduyun_link_match.group(2))
else:
print('status_code',response.status_code)
def save_result(content):
with open('result.txt','a',encoding='utf-8') as f:
f.write(json.dumps(content)+'\n')
if __name__ == "__main__":
for i in range(1,18):
try:
url = 'https://www.52pojie.cn/forum.php?mod=collection&action=view&ctid=1667&page={}'.format(str(i))
print(url)
index_list = get_index(url)
for detial_info in index_list:
# print(detial_info['detail_url'])
detail_url = detial_info['detail_url']
print(detial_info['title'])
result = get_detail(detail_url)
if result:
result['title'] =detial_info['title']
print(result)
test_valid = Baiduyun(result['url'])
isvalid = test_valid.verify()
if isvalid:
save_result(result)
time.sleep(1)
except exceptions as e:
time.sleep(10)
print(e)
continue
except:
time.sleep(10)
continue
# url = '<a href="https://pan.baidu.com/s/1YC_MJ_RzcmK3EmTAKSST6w" target="_blank">https://pan.baidu.com/s/1YC_MJ_RzcmK3EmTAKSST6w</a>'
# # url = '<a href="https://pan.baidu.com/s/1CtUlgWRaI-bYcSwEbuAN3A" target="_blank">https://pan.baidu.com/s/1CtUlgWRaI-bYcSwEbuAN3A</a>'
# mybaiduyuan = Baiduyun(url)
# mybaiduyuan.verify()
#来源:https://www.52pojie.cn/thread-1061322-1-1.html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment