Created
November 25, 2021 14:15
-
-
Save le717/ae2b86f6c420e29532fef8d3f5e4a9bb to your computer and use it in GitHub Desktop.
A small python script to extract memo text from a Samsung Galaxy S7 .memo file (aka a .zip archive)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from html import unescape | |
from pathlib import Path | |
from xml.etree import ElementTree | |
all_memos = Path().rglob("memo_content.xml") | |
output_dir = Path("output") | |
output_dir.mkdir(exist_ok=True) | |
for memo in all_memos: | |
memo_text = memo.read_bytes() | |
root = ElementTree.fromstring(memo_text.decode()) | |
header = root[0] | |
content = root[1] | |
note_title = header[0].attrib["title"].strip() | |
created = header[3].attrib["createdTime"] | |
if not note_title: | |
note_title = created | |
note_text: str = unescape(content[0].text) | |
note_text = note_text.replace('<p value="memo2" >', "") | |
note_text = note_text.replace("<p>", "") | |
note_text = note_text.replace("</p>", "\r\n") | |
note_text = note_text.strip() | |
note_template = f"""Title: {note_title} | |
Created: {created} | |
{note_text} | |
""" | |
outfile = output_dir / Path(f"{note_title}.txt") | |
if not outfile.exists(): | |
print(f"Writing file {outfile.name}") | |
outfile.write_bytes(note_template.encode("utf-8")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment