Created
February 26, 2024 03:08
-
-
Save YuzuRyo61/7e1ded0c92b8b5a22fb1600f6ac03535 to your computer and use it in GitHub Desktop.
Pocketでエクスポートしたデータを、CSVに加工するPythonスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import datetime | |
import csv | |
from html.parser import HTMLParser | |
EXPORTED_FILE_NAME = "ril_export.html" | |
OUTPUT_FILE_NAME = "ril_export_csv.csv" | |
class RilExportHTMLParser(HTMLParser): | |
""" | |
ril_export.html のaタグについて: | |
hrefにはpocketで保存したURL | |
time_addedにはUNIX時間で追加された時間が入力されている | |
tagsにはタグが追加されるが、使ったことがないのでわからない | |
""" | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.title = False | |
self.href = False | |
self.data = [] | |
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]): | |
attrs_dict = dict(attrs) | |
if tag == "li": | |
self.data.append({}) | |
self.title = True | |
self.href = True | |
if tag == "a" and self.href == True: | |
self.data[-1].update({ | |
"url": attrs_dict.get("href"), | |
"added": datetime.datetime.fromtimestamp( | |
int(attrs_dict.get("time_added", "0"))) if attrs_dict.get("time_added", None) is not None else None, | |
}) | |
def handle_data(self, data): | |
if self.title == True or self.href == True: | |
self.data[-1].update({"title": data}) | |
self.title = False | |
self.href = False | |
if __name__ == "__main__": | |
if not os.path.exists(EXPORTED_FILE_NAME): | |
print(f"{EXPORTED_FILE_NAME} is not found. Abort.") | |
sys.exit(1) | |
with open(EXPORTED_FILE_NAME, mode="r", encoding="utf-8") as ef: | |
exported_raw_data = ef.read() | |
parser = RilExportHTMLParser() | |
parser.feed(exported_raw_data) | |
with open(OUTPUT_FILE_NAME, mode="w", encoding="utf-8") as of: | |
writer = csv.DictWriter(of, ["title", "url", "added"], lineterminator="\n") | |
writer.writeheader() | |
writer.writerows(parser.data) | |
print("export OK") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment