Skip to content

Instantly share code, notes, and snippets.

@taktamur
Created January 19, 2025 01:29
Show Gist options
  • Save taktamur/0189d01afae225dfdde7d6c5ad4abaa5 to your computer and use it in GitHub Desktop.
Save taktamur/0189d01afae225dfdde7d6c5ad4abaa5 to your computer and use it in GitHub Desktop.
古いメール *.eml を読める形のテキストに変換。ただしこれで展開できなかったものもある
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import email\n",
"from email import policy\n",
"from email.parser import BytesParser\n",
"import glob\n",
"from datetime import datetime\n",
"\n",
"\n",
"def read_eml(file_path):\n",
" \"\"\"\n",
" .eml形式のメールファイルを読み込み、その内容を抽出します。\n",
" 引数:\n",
" file_path (str): .emlファイルのパス。\n",
" 戻り値:\n",
" dict: メールの件名、送信者、受信者、日付、本文を含む辞書。\n",
" - 'subject' (str): メールの件名。\n",
" - 'from' (str): メールの送信者。\n",
" - 'to' (str): メールの受信者。\n",
" - 'date' (str): メールが送信された日付。\n",
" - 'body' (str): メールのプレーンテキスト本文。\n",
" \"\"\"\n",
" with open(file_path, 'rb') as f:\n",
" msg = BytesParser(policy=policy.default).parse(f)\n",
" \n",
" # Extract the email content\n",
" subject = msg['subject']\n",
" from_ = msg['from']\n",
" to = msg['to']\n",
" date = msg['date']\n",
" body = msg.get_body(preferencelist=('plain'))\n",
" if body:\n",
" body = body.get_content()\n",
" else:\n",
" body = \"\"\n",
" \n",
" return {\n",
" 'subject': subject,\n",
" 'from': from_,\n",
" 'to': to,\n",
" 'date': date,\n",
" 'body': body\n",
" }\n",
"\n",
"def save_email_content_to_txt(file_path, email_content):\n",
" \"\"\"\n",
" メールの内容をテキストファイルに保存します。\n",
" 引数:\n",
" file_path (str): .emlファイルのパス。\n",
" email_content (dict): メールの内容を含む辞書。\n",
" \"\"\"\n",
"\n",
" # メールの日付を解析して yyyymmdd 形式に変換\n",
" email_date = datetime.strptime(email_content['date'], '%a, %d %b %Y %H:%M:%S %z')\n",
" date_str = email_date.strftime('%Y%m%d')\n",
"\n",
" # 新しいファイル名を作成\n",
" txt_file_path = date_str + '_' + file_path + '.txt'\n",
" # ディレクトリが存在しない場合は作成\n",
" if not os.path.exists('text'):\n",
" os.makedirs('text')\n",
"\n",
" # 新しいファイルパスを text/ ディレクトリに変更\n",
" txt_file_path = os.path.join('text', txt_file_path)\n",
"\n",
" with open(txt_file_path, 'w') as f:\n",
" f.write('Subject: ' + email_content['subject'] + '\\n')\n",
" f.write('From: ' + email_content['from'] + '\\n')\n",
" f.write('To: ' + email_content['to'] + '\\n')\n",
" f.write('Date: ' + email_content['date'] + '\\n')\n",
" f.write('---------------------\\n')\n",
" f.write(email_content['body'])\n",
"\n",
"# .emlファイルをすべて取得\n",
"eml_files = glob.glob('*.eml')\n",
"\n",
"for file_path in eml_files:\n",
" # メール内容を読み込む\n",
" email_content = read_eml(file_path)\n",
"\n",
" # 関数を使用してメール内容をテキストファイルに保存\n",
" save_email_content_to_txt(file_path, email_content)\n",
"\n",
" # メール内容を表示\n",
" print(email_content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import glob\n",
"\n",
"# Get all .txt files in the current directory\n",
"txt_files = glob.glob('*.txt')\n",
"\n",
"# Delete each .txt file\n",
"for txt_file in txt_files:\n",
" os.remove(txt_file)\n",
" print(f\"Deleted {txt_file}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment