Xzonn · July 6, 2025 06:50
diff --git a/download.py b/download.py
 import datetime
 import json
 import math

 import requests

 COOKIES = """name=value"""

 session = requests.Session()
 for cookie in COOKIES.split(";"):
  key, value = cookie.split("=", 1)
  session.cookies.set(key, value)

 results = []

 page = 1
 count = 10

 while math.ceil(count / 10) >= page:
  response = session.post("https://livere.com/insight/managereply/period", {
    "startDate": "2017-01-01",
    "endDate": datetime.date.today(),
    "sort": "regdate",
    "order": "desc",
    "pageNum": page,
  })
  response_json = response.json()
  results += response_json["resultData"]
  count = response_json["count"]
  page += 1

 with open("result.json", "w", -1, "utf8", newline="\n") as writer:
  json.dump(results, writer, ensure_ascii=False, indent=2)
diff --git a/split.py b/split.py
 import json
 import os
 import re
 import urllib.parse
 from datetime import datetime, timedelta, timezone

 with open("result.json", "r", -1, "utf8") as reader:
  results: list[dict] = json.load(reader)[::-1]

 url_data: dict[str, dict[str, list[str]]] = {}

 for result in results:
  url = result["site"]
  content: str = result["content"]
  reply_seq = result["reply_seq"]
  parent_seq = result["parent_seq"]
  author: str = result["name"].strip()
  date: datetime = (
    datetime.strptime(result["regdate"].strip(), "%Y-%m-%d-%I:%M:%S %p")
    .replace(tzinfo=timezone(timedelta(hours=9)))
    .astimezone(timezone(timedelta(hours=8)))
  )
  images = []
  i = 1
  while f"image{i}" in result:
    images.append(result[f"image{i}"])
    i += 1

  content = re.sub(r"\n\n\n+", "\n\n", content).replace("\n", "  \n").replace("  \n  \n", "\n\n")

  path = urllib.parse.unquote(url.split("/", 3)[-1].split("?")[0].split("#")[0], encoding="utf8")

  if path.endswith("/"):
    path += "index.html"
  assert path.endswith(".html")
  path = path.removesuffix(".html")
  if path not in url_data:
    url_data[path] = {}
  if parent_seq not in url_data[path]:
    url_data[path][parent_seq] = []

  output_str = [
    f"*****{author} {'评论于' if reply_seq == parent_seq else '回复于'} {datetime.strftime(date, '%Y-%m-%d %H:%M:%S')} (UTC+8)*****\n\n{content}"
  ]
  for image in images:
    output_str.append(f"\n\n![{image}]({image})")
  url_data[path][parent_seq].append("".join(output_str))

 for path, comments in url_data.items():
  os.makedirs(os.path.join("output", path), exist_ok=True)
  for id, content in sorted(comments.items(), key=lambda x: int(x[0])):
    with open(os.path.join("output", path, f"{id}.md"), "w", -1, "utf8") as writer:
      writer.write(f"> 评论导出自来必力\n\n")
      writer.write("\n\n".join(content))
	import datetime
	import json
	import math

	import requests

	COOKIES = """name=value"""

	session = requests.Session()
	for cookie in COOKIES.split(";"):
	key, value = cookie.split("=", 1)
	session.cookies.set(key, value)

	results = []

	page = 1
	count = 10

	while math.ceil(count / 10) >= page:
	response = session.post("https://livere.com/insight/managereply/period", {
	"startDate": "2017-01-01",
	"endDate": datetime.date.today(),
	"sort": "regdate",
	"order": "desc",
	"pageNum": page,
	})
	response_json = response.json()
	results += response_json["resultData"]
	count = response_json["count"]
	page += 1

	with open("result.json", "w", -1, "utf8", newline="\n") as writer:
	json.dump(results, writer, ensure_ascii=False, indent=2)
	import json
	import os
	import re
	import urllib.parse
	from datetime import datetime, timedelta, timezone

	with open("result.json", "r", -1, "utf8") as reader:
	results: list[dict] = json.load(reader)[::-1]

	url_data: dict[str, dict[str, list[str]]] = {}

	for result in results:
	url = result["site"]
	content: str = result["content"]
	reply_seq = result["reply_seq"]
	parent_seq = result["parent_seq"]
	author: str = result["name"].strip()
	date: datetime = (
	datetime.strptime(result["regdate"].strip(), "%Y-%m-%d-%I:%M:%S %p")
	.replace(tzinfo=timezone(timedelta(hours=9)))
	.astimezone(timezone(timedelta(hours=8)))
	)
	images = []
	i = 1
	while f"image{i}" in result:
	images.append(result[f"image{i}"])
	i += 1

	content = re.sub(r"\n\n\n+", "\n\n", content).replace("\n", " \n").replace(" \n \n", "\n\n")

	path = urllib.parse.unquote(url.split("/", 3)[-1].split("?")[0].split("#")[0], encoding="utf8")

	if path.endswith("/"):
	path += "index.html"
	assert path.endswith(".html")
	path = path.removesuffix(".html")
	if path not in url_data:
	url_data[path] = {}
	if parent_seq not in url_data[path]:
	url_data[path][parent_seq] = []

	output_str = [
	f"***{author} {'评论于' if reply_seq == parent_seq else '回复于'} {datetime.strftime(date, '%Y-%m-%d %H:%M:%S')} (UTC+8)***\n\n{content}"
	]
	for image in images:
	output_str.append(f"\n\n![{image}]({image})")
	url_data[path][parent_seq].append("".join(output_str))

	for path, comments in url_data.items():
	os.makedirs(os.path.join("output", path), exist_ok=True)
	for id, content in sorted(comments.items(), key=lambda x: int(x[0])):
	with open(os.path.join("output", path, f"{id}.md"), "w", -1, "utf8") as writer:
	writer.write(f"> 评论导出自来必力\n\n")
	writer.write("\n\n".join(content))