Skip to content

Instantly share code, notes, and snippets.

@NB-XX
Created May 21, 2024 00:54
Show Gist options
  • Save NB-XX/8a806f51a979aaccec5553ef6d853a4f to your computer and use it in GitHub Desktop.
Save NB-XX/8a806f51a979aaccec5553ef6d853a4f to your computer and use it in GitHub Desktop.
stage1st小黑屋
import requests
import json
import re
import openpyxl
import os
import time
from datetime import datetime
import glob # 用于文件名模式匹配
# 检查并获取最新的文件
def get_latest_file():
files = glob.glob('*_blackroom.xlsx') # 搜索当前目录下所有匹配的文件
if files:
# 如果存在文件,返回最新的一个
latest_file = max(files, key=os.path.getctime)
print(f"Continuing from the latest file: {latest_file}")
return latest_file
else:
# 如果没有匹配的文件,创建新文件
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
new_file = f"{timestamp}_blackroom.xlsx"
print(f"Creating new file: {new_file}")
return new_file
filename = get_latest_file()
# 初始化 Excel 工作簿
if os.path.exists(filename):
workbook = openpyxl.load_workbook(filename)
worksheet = workbook.active
else:
workbook = openpyxl.Workbook()
worksheet = workbook.active
headers = ["CID", "UID", "操作者ID", "操作者", "动作", "原因", "时间", "用户名", "组过期时间"]
worksheet.append(headers)
# 从 Excel 中获取最小的 CID
def get_last_min_cid(worksheet):
min_cid = float('inf')
for row in worksheet.iter_rows(min_row=2, values_only=True):
cid = int(row[0])
if cid < min_cid:
min_cid = cid
return min_cid
current_min_cid = get_last_min_cid(worksheet) if worksheet.max_row > 1 else float('inf')
# 设置基础 URL 和请求参数
base_url = "https://bbs.saraba1st.com/2b/forum.php"
params = {
"mod": "misc",
"action": "showdarkroom",
"ajaxdata": "json",
"cid": str(current_min_cid) if current_min_cid != float('inf') else None
}
# 最小的 CID 目标
min_target_cid = 5
max_retries = 3 # 最大重试次数
try:
while current_min_cid > min_target_cid:
success = False
for attempt in range(max_retries):
try:
response = requests.get(base_url, params=params)
fixed_json = re.sub(r'(?<=\{|\,)(\s*)(\d+)(\s*):', r'\1"\2"\3:', response.text)
json_data = json.loads(fixed_json)
if 'data' in json_data and isinstance(json_data['data'], dict) and json_data['data']:
data = json_data['data']
fetched_cids = [int(details.get("cid", float('inf'))) for details in data.values()]
current_min_cid = min(fetched_cids)
params["cid"] = str(current_min_cid) # 更新 CID 以获取更早的记录
success = True
for uid, details in data.items():
row = [
details.get("cid", ""),
uid,
details.get("operatorid", ""),
details.get("operator", ""),
details.get("action", ""),
details.get("reason", ""),
details.get("dateline", ""),
details.get("username", ""),
details.get("groupexpiry", "")
]
worksheet.append(row)
print(f"Processed CID down to {current_min_cid}")
break
elif 'data' in json_data and not json_data['data']: # 检查数据是否为空
next_cid = int(json_data['message'].split('|')[1])
current_min_cid = next_cid - 1 # 从下一个 CID 继续
params["cid"] = str(current_min_cid)
print(f"Data missing, continuing from CID {current_min_cid}")
success = True
break
else:
print("Unexpected data structure received:", json_data)
time.sleep(5)
except json.JSONDecodeError as e:
print(f"JSON 解析错误: {str(e)}")
time.sleep(5)
except Exception as e:
print(f"请求过程中发生错误: {str(e)}")
time.sleep(5)
if not success:
print("重试三次后仍失败,终止程序")
break
except Exception as e:
print(f"发生未知错误: {str(e)}")
finally:
# 保存 Excel 文件
workbook.save(filename)
print("数据已成功写入 Excel 文件。")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment