Created
May 21, 2024 00:54
-
-
Save NB-XX/8a806f51a979aaccec5553ef6d853a4f to your computer and use it in GitHub Desktop.
stage1st小黑屋
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import json | |
| import re | |
| import openpyxl | |
| import os | |
| import time | |
| from datetime import datetime | |
| import glob # 用于文件名模式匹配 | |
| # 检查并获取最新的文件 | |
| def get_latest_file(): | |
| files = glob.glob('*_blackroom.xlsx') # 搜索当前目录下所有匹配的文件 | |
| if files: | |
| # 如果存在文件,返回最新的一个 | |
| latest_file = max(files, key=os.path.getctime) | |
| print(f"Continuing from the latest file: {latest_file}") | |
| return latest_file | |
| else: | |
| # 如果没有匹配的文件,创建新文件 | |
| timestamp = datetime.now().strftime("%Y%m%d%H%M%S") | |
| new_file = f"{timestamp}_blackroom.xlsx" | |
| print(f"Creating new file: {new_file}") | |
| return new_file | |
| filename = get_latest_file() | |
| # 初始化 Excel 工作簿 | |
| if os.path.exists(filename): | |
| workbook = openpyxl.load_workbook(filename) | |
| worksheet = workbook.active | |
| else: | |
| workbook = openpyxl.Workbook() | |
| worksheet = workbook.active | |
| headers = ["CID", "UID", "操作者ID", "操作者", "动作", "原因", "时间", "用户名", "组过期时间"] | |
| worksheet.append(headers) | |
| # 从 Excel 中获取最小的 CID | |
| def get_last_min_cid(worksheet): | |
| min_cid = float('inf') | |
| for row in worksheet.iter_rows(min_row=2, values_only=True): | |
| cid = int(row[0]) | |
| if cid < min_cid: | |
| min_cid = cid | |
| return min_cid | |
| current_min_cid = get_last_min_cid(worksheet) if worksheet.max_row > 1 else float('inf') | |
| # 设置基础 URL 和请求参数 | |
| base_url = "https://bbs.saraba1st.com/2b/forum.php" | |
| params = { | |
| "mod": "misc", | |
| "action": "showdarkroom", | |
| "ajaxdata": "json", | |
| "cid": str(current_min_cid) if current_min_cid != float('inf') else None | |
| } | |
| # 最小的 CID 目标 | |
| min_target_cid = 5 | |
| max_retries = 3 # 最大重试次数 | |
| try: | |
| while current_min_cid > min_target_cid: | |
| success = False | |
| for attempt in range(max_retries): | |
| try: | |
| response = requests.get(base_url, params=params) | |
| fixed_json = re.sub(r'(?<=\{|\,)(\s*)(\d+)(\s*):', r'\1"\2"\3:', response.text) | |
| json_data = json.loads(fixed_json) | |
| if 'data' in json_data and isinstance(json_data['data'], dict) and json_data['data']: | |
| data = json_data['data'] | |
| fetched_cids = [int(details.get("cid", float('inf'))) for details in data.values()] | |
| current_min_cid = min(fetched_cids) | |
| params["cid"] = str(current_min_cid) # 更新 CID 以获取更早的记录 | |
| success = True | |
| for uid, details in data.items(): | |
| row = [ | |
| details.get("cid", ""), | |
| uid, | |
| details.get("operatorid", ""), | |
| details.get("operator", ""), | |
| details.get("action", ""), | |
| details.get("reason", ""), | |
| details.get("dateline", ""), | |
| details.get("username", ""), | |
| details.get("groupexpiry", "") | |
| ] | |
| worksheet.append(row) | |
| print(f"Processed CID down to {current_min_cid}") | |
| break | |
| elif 'data' in json_data and not json_data['data']: # 检查数据是否为空 | |
| next_cid = int(json_data['message'].split('|')[1]) | |
| current_min_cid = next_cid - 1 # 从下一个 CID 继续 | |
| params["cid"] = str(current_min_cid) | |
| print(f"Data missing, continuing from CID {current_min_cid}") | |
| success = True | |
| break | |
| else: | |
| print("Unexpected data structure received:", json_data) | |
| time.sleep(5) | |
| except json.JSONDecodeError as e: | |
| print(f"JSON 解析错误: {str(e)}") | |
| time.sleep(5) | |
| except Exception as e: | |
| print(f"请求过程中发生错误: {str(e)}") | |
| time.sleep(5) | |
| if not success: | |
| print("重试三次后仍失败,终止程序") | |
| break | |
| except Exception as e: | |
| print(f"发生未知错误: {str(e)}") | |
| finally: | |
| # 保存 Excel 文件 | |
| workbook.save(filename) | |
| print("数据已成功写入 Excel 文件。") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment