-
-
Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.
Annotations of Pandas DataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import timeit\n", | |
"from django.contrib.auth.models import User" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_value = 5" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tic = timeit.default_timer()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"filename = '/tmp/example.xlsx'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_excel(filename)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>NOME</th>\n", | |
" <th>EMAIL</th>\n", | |
" <th>CPF</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Alex Swartz Wertz</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Valerie Berry Mattlin</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Brooke Coley Roy</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Joe Levin Collins</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Lynn Alford Timmons</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" NOME EMAIL CPF\n", | |
"0 Alex Swartz Wertz [email protected] 00000000000000\n", | |
"1 Valerie Berry Mattlin [email protected] 00000000000000\n", | |
"2 Brooke Coley Roy [email protected] 00000000000000\n", | |
"3 Joe Levin Collins [email protected] 00000000000000\n", | |
"4 Lynn Alford Timmons [email protected] 00000000000000" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"email = df['EMAIL']\n", | |
"# Retornando os itens duplicados\n", | |
"# df[email.isin(email[email.duplicated()])]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>index</th>\n", | |
" <th>NOME</th>\n", | |
" <th>EMAIL</th>\n", | |
" <th>CPF</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>49991</th>\n", | |
" <td>49995</td>\n", | |
" <td>Warren Stowell Young</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49992</th>\n", | |
" <td>49996</td>\n", | |
" <td>Patricia Cheek Mitchell</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49993</th>\n", | |
" <td>49997</td>\n", | |
" <td>Linda Denton Breton</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49994</th>\n", | |
" <td>49998</td>\n", | |
" <td>Clyde Lopes Leonard</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49995</th>\n", | |
" <td>49999</td>\n", | |
" <td>Josefina Wilkinson Graves</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>00000000000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" index NOME EMAIL \\\n", | |
"49991 49995 Warren Stowell Young [email protected] \n", | |
"49992 49996 Patricia Cheek Mitchell [email protected] \n", | |
"49993 49997 Linda Denton Breton [email protected] \n", | |
"49994 49998 Clyde Lopes Leonard [email protected] \n", | |
"49995 49999 Josefina Wilkinson Graves [email protected] \n", | |
"\n", | |
" CPF \n", | |
"49991 00000000000000 \n", | |
"49992 00000000000000 \n", | |
"49993 00000000000000 \n", | |
"49994 00000000000000 \n", | |
"49995 00000000000000 " | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Removendo os itens duplicados\n", | |
"dffinal = df.drop_duplicates('EMAIL').reset_index()\n", | |
"dffinal.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"49996" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(dffinal)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def create_user2(new_users):\n", | |
" # Separa full_name em first_name e last_name\n", | |
" users = []\n", | |
" for new_user in new_users:\n", | |
" full_name = new_user['full_name']\n", | |
" email = new_user['email']\n", | |
" first_name = full_name.split()[0]\n", | |
" last_name = full_name.split()[1:]\n", | |
" last_name = ' '.join(last_name)\n", | |
" username = email\n", | |
" sha_password = 'kkkkkkkkkkkkkkkkkkkk'\n", | |
" user = User(\n", | |
" first_name=first_name,\n", | |
" last_name=last_name,\n", | |
" email=email,\n", | |
" username=username,\n", | |
" password=sha_password\n", | |
" )\n", | |
" users.append(user)\n", | |
" return users" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def import_users2(new_users):\n", | |
" User.objects.bulk_create(new_users)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"new_users = []\n", | |
"emails = []\n", | |
"emails = dffinal['EMAIL'].values.tolist()\n", | |
"exist_users = User.objects.filter(email__in=emails).values_list('email', flat=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# for row in dffinal.head().itertuples():\n", | |
"# print(row.NOME, row.EMAIL, row.CPF)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for row in dffinal.head(max_value).itertuples():\n", | |
" full_name = row.NOME\n", | |
" email = row.EMAIL\n", | |
" cpf = row.CPF\n", | |
" if email not in exist_users:\n", | |
" data = dict(full_name=full_name, email=email, cpf=cpf)\n", | |
" new_users.append(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"new_users_ = create_user2(new_users)\n", | |
"import_users2(new_users_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(0, 'existentes')\n", | |
"(5, 'novos')\n" | |
] | |
} | |
], | |
"source": [ | |
"print(len(exist_users), 'existentes')\n", | |
"print(len(new_users), 'novos')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"toc = timeit.default_timer()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2.6345059871673584" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"toc - tic" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"users_query_list = User.objects.filter(email__in=new_users_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# users_query_list[:5]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"simpleuser = Group.objects.get(name='simpleuser')\n", | |
"simpleuser.id" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"user_groups_list = []\n", | |
"# https://stackoverflow.com/a/34093544\n", | |
"for user in users_query_list:\n", | |
" user_groups = User.groups.through(user_id=user.pk, group_id=simpleuser.pk)\n", | |
" user_groups_list.append(user_groups)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<User_groups: User_groups object>,\n", | |
" <User_groups: User_groups object>,\n", | |
" <User_groups: User_groups object>,\n", | |
" <User_groups: User_groups object>,\n", | |
" <User_groups: User_groups object>]" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"User.groups.through.objects.bulk_create(user_groups_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"toc = timeit.default_timer()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3.3119399547576904" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"toc - tic" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# UserCPF\n", | |
"exist_usercpfs = UserCPF.objects.filter(user__email__in=emails).values_list('user__email', flat=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[email protected]\n", | |
"[email protected]\n", | |
"[email protected]\n", | |
"[email protected]\n", | |
"[email protected]\n" | |
] | |
} | |
], | |
"source": [ | |
"new_usercpfs = []\n", | |
"for row in dffinal.head(max_value).itertuples():\n", | |
" email = row.EMAIL\n", | |
" cpf = row.CPF\n", | |
" # Verifica se UserCPF ja existe\n", | |
" if email not in exist_usercpfs:\n", | |
" print(email)\n", | |
" user = User.objects.get(email=email)\n", | |
" data = dict(user=user, cpf=cpf)\n", | |
" new_usercpfs.append(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# função create_usercpf(new_usercpfs)\n", | |
"new_usercpfs2 = []\n", | |
"for user in new_usercpfs:\n", | |
" new_usercpfs2.append(UserCPF(user=user['user'], cpf=user['cpf']))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<UserCPF: Alex Swartz Wertz>,\n", | |
" <UserCPF: Valerie Berry Mattlin>,\n", | |
" <UserCPF: Brooke Coley Roy>,\n", | |
" <UserCPF: Joe Levin Collins>,\n", | |
" <UserCPF: Lynn Alford Timmons>]" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"UserCPF.objects.bulk_create(new_usercpfs2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"toc = timeit.default_timer()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"4.112852096557617" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"toc - tic" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Employee\n", | |
"firm = Company.objects.get(name='Empresa')\n", | |
"exist_employees = Employee.objects.filter(user__email__in=emails).values_list('user__email', flat=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Company: Empresa>" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"firm" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"new_employees = []\n", | |
"for row in dffinal.head(max_value).itertuples():\n", | |
" email = row.EMAIL\n", | |
" if email not in exist_employees:\n", | |
" user = User.objects.get(email=email)\n", | |
" data = dict(user=user, firm=firm, department='14')\n", | |
" new_employees.append(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: [email protected]>}\n", | |
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: [email protected]>}\n", | |
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: [email protected]>}\n", | |
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: [email protected]>}\n", | |
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: [email protected]>}\n" | |
] | |
} | |
], | |
"source": [ | |
"# função create_employee(new_employees)\n", | |
"new_employees2 = []\n", | |
"for user in new_employees:\n", | |
" print(user)\n", | |
" new_employees2.append(Employee(user=user['user'], firm=user['firm'], department=user['department']))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<Employee: Alex Swartz Wertz>,\n", | |
" <Employee: Valerie Berry Mattlin>,\n", | |
" <Employee: Brooke Coley Roy>,\n", | |
" <Employee: Joe Levin Collins>,\n", | |
" <Employee: Lynn Alford Timmons>]" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Employee.objects.bulk_create(new_employees2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Permissões\n", | |
"def create_permissions():\n", | |
" # Criando permissão view_manage_career_pages\n", | |
" content_type = ContentType.objects.get_for_model(Company)\n", | |
" permission = Permission.objects.get(\n", | |
" content_type=content_type,\n", | |
" codename='view_manage_career_pages'\n", | |
" )\n", | |
" # Habilitando permissão para simpleuser\n", | |
" g = Group.objects.get(name='simpleuser')\n", | |
" g.permissions.add(permission)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def create_permissions_quiz():\n", | |
" # Criando permissão add_quiz\n", | |
" content_type = ContentType.objects.get_for_model(Quiz)\n", | |
" permission = Permission.objects.get(\n", | |
" content_type=content_type,\n", | |
" codename='add_quiz'\n", | |
" )\n", | |
" # Habilitando permissão para simpleuser\n", | |
" g = Group.objects.get(name='simpleuser')\n", | |
" g.permissions.add(permission)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"create_permissions()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"create_permissions_quiz()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"toc = timeit.default_timer()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5.437417984008789" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"toc - tic" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Django Shell-Plus", | |
"language": "python", | |
"name": "django_extensions" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment