Skip to content

Instantly share code, notes, and snippets.

@Xzonn
Created July 6, 2025 06:50
Show Gist options
  • Save Xzonn/c7fdca56a1f615a0f3c6d835ed10298d to your computer and use it in GitHub Desktop.
Save Xzonn/c7fdca56a1f615a0f3c6d835ed10298d to your computer and use it in GitHub Desktop.
LivereExport
import datetime
import json
import math
import requests
COOKIES = """name=value"""
session = requests.Session()
for cookie in COOKIES.split(";"):
key, value = cookie.split("=", 1)
session.cookies.set(key, value)
results = []
page = 1
count = 10
while math.ceil(count / 10) >= page:
response = session.post("https://livere.com/insight/managereply/period", {
"startDate": "2017-01-01",
"endDate": datetime.date.today(),
"sort": "regdate",
"order": "desc",
"pageNum": page,
})
response_json = response.json()
results += response_json["resultData"]
count = response_json["count"]
page += 1
with open("result.json", "w", -1, "utf8", newline="\n") as writer:
json.dump(results, writer, ensure_ascii=False, indent=2)
import json
import os
import re
import urllib.parse
from datetime import datetime, timedelta, timezone
with open("result.json", "r", -1, "utf8") as reader:
results: list[dict] = json.load(reader)[::-1]
url_data: dict[str, dict[str, list[str]]] = {}
for result in results:
url = result["site"]
content: str = result["content"]
reply_seq = result["reply_seq"]
parent_seq = result["parent_seq"]
author: str = result["name"].strip()
date: datetime = (
datetime.strptime(result["regdate"].strip(), "%Y-%m-%d-%I:%M:%S %p")
.replace(tzinfo=timezone(timedelta(hours=9)))
.astimezone(timezone(timedelta(hours=8)))
)
images = []
i = 1
while f"image{i}" in result:
images.append(result[f"image{i}"])
i += 1
content = re.sub(r"\n\n\n+", "\n\n", content).replace("\n", " \n").replace(" \n \n", "\n\n")
path = urllib.parse.unquote(url.split("/", 3)[-1].split("?")[0].split("#")[0], encoding="utf8")
if path.endswith("/"):
path += "index.html"
assert path.endswith(".html")
path = path.removesuffix(".html")
if path not in url_data:
url_data[path] = {}
if parent_seq not in url_data[path]:
url_data[path][parent_seq] = []
output_str = [
f"*****{author} {'评论于' if reply_seq == parent_seq else '回复于'} {datetime.strftime(date, '%Y-%m-%d %H:%M:%S')} (UTC+8)*****\n\n{content}"
]
for image in images:
output_str.append(f"\n\n![{image}]({image})")
url_data[path][parent_seq].append("".join(output_str))
for path, comments in url_data.items():
os.makedirs(os.path.join("output", path), exist_ok=True)
for id, content in sorted(comments.items(), key=lambda x: int(x[0])):
with open(os.path.join("output", path, f"{id}.md"), "w", -1, "utf8") as writer:
writer.write(f"> 评论导出自来必力\n\n")
writer.write("\n\n".join(content))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment