Created
July 6, 2025 06:50
-
-
Save Xzonn/c7fdca56a1f615a0f3c6d835ed10298d to your computer and use it in GitHub Desktop.
LivereExport
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import datetime | |
| import json | |
| import math | |
| import requests | |
| COOKIES = """name=value""" | |
| session = requests.Session() | |
| for cookie in COOKIES.split(";"): | |
| key, value = cookie.split("=", 1) | |
| session.cookies.set(key, value) | |
| results = [] | |
| page = 1 | |
| count = 10 | |
| while math.ceil(count / 10) >= page: | |
| response = session.post("https://livere.com/insight/managereply/period", { | |
| "startDate": "2017-01-01", | |
| "endDate": datetime.date.today(), | |
| "sort": "regdate", | |
| "order": "desc", | |
| "pageNum": page, | |
| }) | |
| response_json = response.json() | |
| results += response_json["resultData"] | |
| count = response_json["count"] | |
| page += 1 | |
| with open("result.json", "w", -1, "utf8", newline="\n") as writer: | |
| json.dump(results, writer, ensure_ascii=False, indent=2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import os | |
| import re | |
| import urllib.parse | |
| from datetime import datetime, timedelta, timezone | |
| with open("result.json", "r", -1, "utf8") as reader: | |
| results: list[dict] = json.load(reader)[::-1] | |
| url_data: dict[str, dict[str, list[str]]] = {} | |
| for result in results: | |
| url = result["site"] | |
| content: str = result["content"] | |
| reply_seq = result["reply_seq"] | |
| parent_seq = result["parent_seq"] | |
| author: str = result["name"].strip() | |
| date: datetime = ( | |
| datetime.strptime(result["regdate"].strip(), "%Y-%m-%d-%I:%M:%S %p") | |
| .replace(tzinfo=timezone(timedelta(hours=9))) | |
| .astimezone(timezone(timedelta(hours=8))) | |
| ) | |
| images = [] | |
| i = 1 | |
| while f"image{i}" in result: | |
| images.append(result[f"image{i}"]) | |
| i += 1 | |
| content = re.sub(r"\n\n\n+", "\n\n", content).replace("\n", " \n").replace(" \n \n", "\n\n") | |
| path = urllib.parse.unquote(url.split("/", 3)[-1].split("?")[0].split("#")[0], encoding="utf8") | |
| if path.endswith("/"): | |
| path += "index.html" | |
| assert path.endswith(".html") | |
| path = path.removesuffix(".html") | |
| if path not in url_data: | |
| url_data[path] = {} | |
| if parent_seq not in url_data[path]: | |
| url_data[path][parent_seq] = [] | |
| output_str = [ | |
| f"*****{author} {'评论于' if reply_seq == parent_seq else '回复于'} {datetime.strftime(date, '%Y-%m-%d %H:%M:%S')} (UTC+8)*****\n\n{content}" | |
| ] | |
| for image in images: | |
| output_str.append(f"\n\n") | |
| url_data[path][parent_seq].append("".join(output_str)) | |
| for path, comments in url_data.items(): | |
| os.makedirs(os.path.join("output", path), exist_ok=True) | |
| for id, content in sorted(comments.items(), key=lambda x: int(x[0])): | |
| with open(os.path.join("output", path, f"{id}.md"), "w", -1, "utf8") as writer: | |
| writer.write(f"> 评论导出自来必力\n\n") | |
| writer.write("\n\n".join(content)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment