Created
January 19, 2025 01:29
-
-
Save taktamur/0189d01afae225dfdde7d6c5ad4abaa5 to your computer and use it in GitHub Desktop.
古いメール *.eml を読める形のテキストに変換。ただしこれで展開できなかったものもある
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import email\n", | |
"from email import policy\n", | |
"from email.parser import BytesParser\n", | |
"import glob\n", | |
"from datetime import datetime\n", | |
"\n", | |
"\n", | |
"def read_eml(file_path):\n", | |
" \"\"\"\n", | |
" .eml形式のメールファイルを読み込み、その内容を抽出します。\n", | |
" 引数:\n", | |
" file_path (str): .emlファイルのパス。\n", | |
" 戻り値:\n", | |
" dict: メールの件名、送信者、受信者、日付、本文を含む辞書。\n", | |
" - 'subject' (str): メールの件名。\n", | |
" - 'from' (str): メールの送信者。\n", | |
" - 'to' (str): メールの受信者。\n", | |
" - 'date' (str): メールが送信された日付。\n", | |
" - 'body' (str): メールのプレーンテキスト本文。\n", | |
" \"\"\"\n", | |
" with open(file_path, 'rb') as f:\n", | |
" msg = BytesParser(policy=policy.default).parse(f)\n", | |
" \n", | |
" # Extract the email content\n", | |
" subject = msg['subject']\n", | |
" from_ = msg['from']\n", | |
" to = msg['to']\n", | |
" date = msg['date']\n", | |
" body = msg.get_body(preferencelist=('plain'))\n", | |
" if body:\n", | |
" body = body.get_content()\n", | |
" else:\n", | |
" body = \"\"\n", | |
" \n", | |
" return {\n", | |
" 'subject': subject,\n", | |
" 'from': from_,\n", | |
" 'to': to,\n", | |
" 'date': date,\n", | |
" 'body': body\n", | |
" }\n", | |
"\n", | |
"def save_email_content_to_txt(file_path, email_content):\n", | |
" \"\"\"\n", | |
" メールの内容をテキストファイルに保存します。\n", | |
" 引数:\n", | |
" file_path (str): .emlファイルのパス。\n", | |
" email_content (dict): メールの内容を含む辞書。\n", | |
" \"\"\"\n", | |
"\n", | |
" # メールの日付を解析して yyyymmdd 形式に変換\n", | |
" email_date = datetime.strptime(email_content['date'], '%a, %d %b %Y %H:%M:%S %z')\n", | |
" date_str = email_date.strftime('%Y%m%d')\n", | |
"\n", | |
" # 新しいファイル名を作成\n", | |
" txt_file_path = date_str + '_' + file_path + '.txt'\n", | |
" # ディレクトリが存在しない場合は作成\n", | |
" if not os.path.exists('text'):\n", | |
" os.makedirs('text')\n", | |
"\n", | |
" # 新しいファイルパスを text/ ディレクトリに変更\n", | |
" txt_file_path = os.path.join('text', txt_file_path)\n", | |
"\n", | |
" with open(txt_file_path, 'w') as f:\n", | |
" f.write('Subject: ' + email_content['subject'] + '\\n')\n", | |
" f.write('From: ' + email_content['from'] + '\\n')\n", | |
" f.write('To: ' + email_content['to'] + '\\n')\n", | |
" f.write('Date: ' + email_content['date'] + '\\n')\n", | |
" f.write('---------------------\\n')\n", | |
" f.write(email_content['body'])\n", | |
"\n", | |
"# .emlファイルをすべて取得\n", | |
"eml_files = glob.glob('*.eml')\n", | |
"\n", | |
"for file_path in eml_files:\n", | |
" # メール内容を読み込む\n", | |
" email_content = read_eml(file_path)\n", | |
"\n", | |
" # 関数を使用してメール内容をテキストファイルに保存\n", | |
" save_email_content_to_txt(file_path, email_content)\n", | |
"\n", | |
" # メール内容を表示\n", | |
" print(email_content)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import glob\n", | |
"\n", | |
"# Get all .txt files in the current directory\n", | |
"txt_files = glob.glob('*.txt')\n", | |
"\n", | |
"# Delete each .txt file\n", | |
"for txt_file in txt_files:\n", | |
" os.remove(txt_file)\n", | |
" print(f\"Deleted {txt_file}\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "base", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment