Skip to content

Instantly share code, notes, and snippets.

@olivx
Forked from rg3915/FromDict.ipynb
Created December 5, 2018 21:29
Show Gist options
  • Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.
Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.
Annotations of Pandas DataFrame
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import names\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def gen_names(max_value):\n",
" persons = []\n",
" for _ in range(max_value):\n",
" first_name = names.get_first_name()\n",
" last_name = names.get_last_name()\n",
" full_name = '%s %s' % (first_name, last_name)\n",
" email = '%[email protected]' % first_name.lower()\n",
" ctx = (full_name, email)\n",
" persons.append(ctx)\n",
" return persons"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"names = gen_names(100)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(names, columns=('NAME', 'EMAIL'))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NAME</th>\n",
" <th>EMAIL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Albert Cunningham</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>Allen Martinez</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>Amalia Mouret</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>Amanda Mcmahan</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Ann Rountree</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NAME EMAIL\n",
"68 Albert Cunningham [email protected]\n",
"72 Allen Martinez [email protected]\n",
"73 Amalia Mouret [email protected]\n",
"62 Amanda Mcmahan [email protected]\n",
"28 Ann Rountree [email protected]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(by=['EMAIL']).head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NAME</th>\n",
" <th>EMAIL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Gregory Crittendon</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Gregory Thomson</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>Juan Brown</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>Juan May</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>Kathleen Anderson</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>Kathleen Webb</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Paul Nelson</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>Paul Morris</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rene Warthen</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Rene Kidd</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>William Hunter</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>William Hayes</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NAME EMAIL\n",
"16 Gregory Crittendon [email protected]\n",
"54 Gregory Thomson [email protected]\n",
"39 Juan Brown [email protected]\n",
"86 Juan May [email protected]\n",
"58 Kathleen Anderson [email protected]\n",
"74 Kathleen Webb [email protected]\n",
"53 Paul Nelson [email protected]\n",
"69 Paul Morris [email protected]\n",
"0 Rene Warthen [email protected]\n",
"24 Rene Kidd [email protected]\n",
"25 William Hunter [email protected]\n",
"47 William Hayes [email protected]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"email = df['EMAIL']\n",
"dfd = df[email.isin(email[email.duplicated()])]\n",
"dfd.sort_values(by=['EMAIL'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"dffinal = df.drop_duplicates('EMAIL').sort_values(by=['EMAIL'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>NAME</th>\n",
" <th>EMAIL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>68</td>\n",
" <td>Albert Cunningham</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>72</td>\n",
" <td>Allen Martinez</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>73</td>\n",
" <td>Amalia Mouret</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>62</td>\n",
" <td>Amanda Mcmahan</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28</td>\n",
" <td>Ann Rountree</td>\n",
" <td>[email protected]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index NAME EMAIL\n",
"0 68 Albert Cunningham [email protected]\n",
"1 72 Allen Martinez [email protected]\n",
"2 73 Amalia Mouret [email protected]\n",
"3 62 Amanda Mcmahan [email protected]\n",
"4 28 Ann Rountree [email protected]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dffinal.reset_index().head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment