Vincent-Stragier · October 4, 2022 11:47
diff --git a/extract_contacts_information_from_vcf.ipynb b/extract_contacts_information_from_vcf.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Split contacts information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "contacts_raw = []\n",
    "\n",
    "file = open('./Contacts.vcf', 'r').read().replace('\\\\n', '\\n')\n",
    "file = file.splitlines()\n",
    "\n",
    "while True:\n",
    "    try:\n",
    "        line = file.pop(0)\n",
    "        if line == 'BEGIN:VCARD':\n",
    "            contact = {}\n",
    "            while True:\n",
    "                line = file.pop(0)\n",
    "                if line == 'END:VCARD':\n",
    "                    contacts_raw.append(contact)\n",
    "                    break\n",
    "                else:\n",
    "                    if ':' in line:\n",
    "                        key, value = line.split(':')\n",
    "                        contact[key] = value\n",
    "                    else:\n",
    "                        contact[key] += '\\n' + line\n",
    "    except IndexError:\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Decode contacts information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "contacts_decoded = []\n",
    "file_content = \"\"\n",
    "\n",
    "keys_set = set()\n",
    "\n",
    "for contact in contacts_raw:\n",
    "    # ['BDAY', 'EMAIL', 'FN', 'FN;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE', 'N', 'N;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE', 'PHOTO;ENCODING=BASE64;JPEG', 'TEL;CELL', 'TEL;CELL;PREF', 'VERSION']\n",
    "    contact_json = {}\n",
    "\n",
    "    # FN\n",
    "    contact_fn = contact.get('FN', None)\n",
    "    if contact_fn is not None:\n",
    "        contact_json.update({'Nom': contact_fn.strip()})\n",
    "\n",
    "    # FN UTF-8\n",
    "    contact_fn = contact.get('FN;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE', None)\n",
    "    if contact_fn is not None:\n",
    "        contact_fn = contact_fn.strip().split('=')[1:]\n",
    "        contact_fn = [chr(int(char_elem, 16)) for char_elem in contact_fn]\n",
    "        contact_fn = ''.join(contact_fn)\n",
    "        contact_fn = contact_fn.strip().replace('Ã©', 'é').replace('Ã«', 'ë')\n",
    "        contact_json.update({'Nom': contact_fn})\n",
    "    \n",
    "    # EMAIL\n",
    "    contact_email = contact.get('EMAIL', None)\n",
    "    if contact_email is not None:\n",
    "        contact_json.update({'Adresse email': contact_email})\n",
    "    \n",
    "    # TEL\n",
    "    contact_phone = contact.get('TEL;CELL', None)\n",
    "    if contact_phone is not None:\n",
    "        contact_json.update({'Téléphone': contact_phone})\n",
    "      \n",
    "    # TEL PREF\n",
    "    contact_phone = contact.get('TEL;CELL;PREF', None)\n",
    "    if contact_phone is not None:\n",
    "        contact_json.update({'Téléphone préféré': contact_phone})\n",
    "\n",
    "    # Picture\n",
    "    contact_photo = contact.get('PHOTO;ENCODING=BASE64;JPEG', None)\n",
    "    if contact_photo is not None:\n",
    "        import base64\n",
    "        # contact_json.update({'Photo': base64.b64decode(contact_photo)})\n",
    "        # save pictures in a separate file\n",
    "        open(f'./photo_{contact_json.get(\"Nom\", \"no_name\")}.jpg', 'wb').write(base64.b64decode(contact_photo))\n",
    "\n",
    "    pretty_contact = \"\"\n",
    "    for key, value  in contact_json.items():\n",
    "        pretty_contact += f\"{key} : {value}\\n\"\n",
    "\n",
    "    file_content += pretty_contact + \"----------------------------------------------------\\n\"\n",
    "    contacts_decoded.append(contact_json)\n",
    "    keys_set.update(contact_json.keys())\n",
    "\n",
    "    for key in contact.keys():\n",
    "        keys_set.add(key)\n",
    "\n",
    "print(file_content)\n",
    "open('contacts.txt', 'wb').write(file_content.encode('utf-8'))\n",
    "\n",
    "keys_set = sorted(keys_set)\n",
    "print(f'{keys_set = }')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.5 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "a4a3baa59a5915e990857c994003bac406d6f8104ea28d9a51431ae5226891ab"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Split contacts information"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"contacts_raw = []\n",
	"\n",
	"file = open('./Contacts.vcf', 'r').read().replace('\\\\n', '\\n')\n",
	"file = file.splitlines()\n",
	"\n",
	"while True:\n",
	" try:\n",
	" line = file.pop(0)\n",
	" if line == 'BEGIN:VCARD':\n",
	" contact = {}\n",
	" while True:\n",
	" line = file.pop(0)\n",
	" if line == 'END:VCARD':\n",
	" contacts_raw.append(contact)\n",
	" break\n",
	" else:\n",
	" if ':' in line:\n",
	" key, value = line.split(':')\n",
	" contact[key] = value\n",
	" else:\n",
	" contact[key] += '\\n' + line\n",
	" except IndexError:\n",
	" break"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Decode contacts information"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"contacts_decoded = []\n",
	"file_content = \"\"\n",
	"\n",
	"keys_set = set()\n",
	"\n",
	"for contact in contacts_raw:\n",
	" # ['BDAY', 'EMAIL', 'FN', 'FN;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE', 'N', 'N;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE', 'PHOTO;ENCODING=BASE64;JPEG', 'TEL;CELL', 'TEL;CELL;PREF', 'VERSION']\n",
	" contact_json = {}\n",
	"\n",
	" # FN\n",
	" contact_fn = contact.get('FN', None)\n",
	" if contact_fn is not None:\n",
	" contact_json.update({'Nom': contact_fn.strip()})\n",
	"\n",
	" # FN UTF-8\n",
	" contact_fn = contact.get('FN;CHARSET=UTF-8;ENCODING=QUOTED-PRINTABLE', None)\n",
	" if contact_fn is not None:\n",
	" contact_fn = contact_fn.strip().split('=')[1:]\n",
	" contact_fn = [chr(int(char_elem, 16)) for char_elem in contact_fn]\n",
	" contact_fn = ''.join(contact_fn)\n",
	" contact_fn = contact_fn.strip().replace('Ã©', 'é').replace('Ã«', 'ë')\n",
	" contact_json.update({'Nom': contact_fn})\n",
	" \n",
	" # EMAIL\n",
	" contact_email = contact.get('EMAIL', None)\n",
	" if contact_email is not None:\n",
	" contact_json.update({'Adresse email': contact_email})\n",
	" \n",
	" # TEL\n",
	" contact_phone = contact.get('TEL;CELL', None)\n",
	" if contact_phone is not None:\n",
	" contact_json.update({'Téléphone': contact_phone})\n",
	" \n",
	" # TEL PREF\n",
	" contact_phone = contact.get('TEL;CELL;PREF', None)\n",
	" if contact_phone is not None:\n",
	" contact_json.update({'Téléphone préféré': contact_phone})\n",
	"\n",
	" # Picture\n",
	" contact_photo = contact.get('PHOTO;ENCODING=BASE64;JPEG', None)\n",
	" if contact_photo is not None:\n",
	" import base64\n",
	" # contact_json.update({'Photo': base64.b64decode(contact_photo)})\n",
	" # save pictures in a separate file\n",
	" open(f'./photo_{contact_json.get(\"Nom\", \"no_name\")}.jpg', 'wb').write(base64.b64decode(contact_photo))\n",
	"\n",
	" pretty_contact = \"\"\n",
	" for key, value in contact_json.items():\n",
	" pretty_contact += f\"{key} : {value}\\n\"\n",
	"\n",
	" file_content += pretty_contact + \"----------------------------------------------------\\n\"\n",
	" contacts_decoded.append(contact_json)\n",
	" keys_set.update(contact_json.keys())\n",
	"\n",
	" for key in contact.keys():\n",
	" keys_set.add(key)\n",
	"\n",
	"print(file_content)\n",
	"open('contacts.txt', 'wb').write(file_content.encode('utf-8'))\n",
	"\n",
	"keys_set = sorted(keys_set)\n",
	"print(f'{keys_set = }')"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3.9.5 ('base')",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.9.5"
	},
	"orig_nbformat": 4,
	"vscode": {
	"interpreter": {
	"hash": "a4a3baa59a5915e990857c994003bac406d6f8104ea28d9a51431ae5226891ab"
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}