Skip to content

Instantly share code, notes, and snippets.

@rg3915
Last active December 15, 2021 18:51
Show Gist options
  • Save rg3915/5fb3a2e7338115bc92e82b7a9a2b372b to your computer and use it in GitHub Desktop.
Save rg3915/5fb3a2e7338115bc92e82b7a9a2b372b to your computer and use it in GitHub Desktop.
Annotations of Pandas DataFrame
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_excel('Orcamentos.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = df[['IDOrcamento', 'NumOrc', 'NumOrc2', 'Opcao', 'Ano']]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>IDOrcamento</th>\n",
" <th>NumOrc</th>\n",
" <th>NumOrc2</th>\n",
" <th>Opcao</th>\n",
" <th>Ano</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2938</td>\n",
" <td>1390</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-04 10:01:14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2940</td>\n",
" <td>1391</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-04 14:27:44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2941</td>\n",
" <td>1392</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-04 14:47:23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2942</td>\n",
" <td>1393</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-04 15:03:33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2943</td>\n",
" <td>1394</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-04 15:18:16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IDOrcamento NumOrc NumOrc2 Opcao Ano\n",
"0 2938 1390 NaN A 2016-01-04 10:01:14\n",
"1 2940 1391 NaN A 2016-01-04 14:27:44\n",
"2 2941 1392 NaN A 2016-01-04 14:47:23\n",
"3 2942 1393 NaN A 2016-01-04 15:03:33\n",
"4 2943 1394 NaN A 2016-01-04 15:18:16"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"datetime.datetime(2016, 1, 4, 10, 1, 14)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Ano'][0].to_pydatetime()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df['NOrc'] = df['NumOrc'].astype(str) + '.' + df['Opcao'] + '/' + df['Ano'].dt.year.astype(str)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>IDOrcamento</th>\n",
" <th>NumOrc</th>\n",
" <th>NumOrc2</th>\n",
" <th>Opcao</th>\n",
" <th>Ano</th>\n",
" <th>NOrc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>3012</td>\n",
" <td>1450</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-22 08:20:33</td>\n",
" <td>1450.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>3013</td>\n",
" <td>1451</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-22 11:19:26</td>\n",
" <td>1451.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>3014</td>\n",
" <td>1305</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" <td>2015-11-26 15:00:40</td>\n",
" <td>1305.B/2015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>3015</td>\n",
" <td>1452</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-26 10:11:53</td>\n",
" <td>1452.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>3016</td>\n",
" <td>1394</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" <td>2016-04-01 15:18:16</td>\n",
" <td>1394.B/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>3017</td>\n",
" <td>1453</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-26 12:26:44</td>\n",
" <td>1453.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>3018</td>\n",
" <td>1454</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2016-01-26 15:54:54</td>\n",
" <td>1454.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>3019</td>\n",
" <td>1454</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" <td>2016-01-26 15:54:54</td>\n",
" <td>1454.B/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>3020</td>\n",
" <td>1454</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" <td>2016-01-26 15:54:54</td>\n",
" <td>1454.C/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>3021</td>\n",
" <td>1454</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" <td>2016-01-26 15:54:54</td>\n",
" <td>1454.D/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>3022</td>\n",
" <td>1454</td>\n",
" <td>NaN</td>\n",
" <td>E</td>\n",
" <td>2016-01-26 15:54:54</td>\n",
" <td>1454.E/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>3023</td>\n",
" <td>1454</td>\n",
" <td>NaN</td>\n",
" <td>F</td>\n",
" <td>2016-01-26 15:54:54</td>\n",
" <td>1454.F/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>3024</td>\n",
" <td>1305</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" <td>2015-11-26 15:00:40</td>\n",
" <td>1305.C/2015</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IDOrcamento NumOrc NumOrc2 Opcao Ano NOrc\n",
"70 3012 1450 NaN A 2016-01-22 08:20:33 1450.A/2016\n",
"71 3013 1451 NaN A 2016-01-22 11:19:26 1451.A/2016\n",
"72 3014 1305 NaN B 2015-11-26 15:00:40 1305.B/2015\n",
"73 3015 1452 NaN A 2016-01-26 10:11:53 1452.A/2016\n",
"74 3016 1394 NaN B 2016-04-01 15:18:16 1394.B/2016\n",
"75 3017 1453 NaN A 2016-01-26 12:26:44 1453.A/2016\n",
"76 3018 1454 NaN A 2016-01-26 15:54:54 1454.A/2016\n",
"77 3019 1454 NaN B 2016-01-26 15:54:54 1454.B/2016\n",
"78 3020 1454 NaN C 2016-01-26 15:54:54 1454.C/2016\n",
"79 3021 1454 NaN D 2016-01-26 15:54:54 1454.D/2016\n",
"80 3022 1454 NaN E 2016-01-26 15:54:54 1454.E/2016\n",
"81 3023 1454 NaN F 2016-01-26 15:54:54 1454.F/2016\n",
"82 3024 1305 NaN C 2015-11-26 15:00:40 1305.C/2015"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[70:82]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>IDOrcamento</th>\n",
" <th>NumOrc</th>\n",
" <th>NumOrc2</th>\n",
" <th>Opcao</th>\n",
" <th>Ano</th>\n",
" <th>NOrc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3099</th>\n",
" <td>6132</td>\n",
" <td>1429</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" <td>2018-12-21 11:37:06</td>\n",
" <td>1429.A/2018</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IDOrcamento NumOrc NumOrc2 Opcao Ano NOrc\n",
"3099 6132 1429 NaN A 2018-12-21 11:37:06 1429.A/2018"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['NOrc'].str.contains('1429.A/2018')]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6132"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['NOrc'].str.contains('1429.A/2018')]['IDOrcamento'].iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"dff = pd.DataFrame(columns=['IDOrcamento', 'NumOrc2', 'NOrc'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2016\n",
"2016\n",
"2016\n",
"2016\n",
"2016\n"
]
}
],
"source": [
"for row in df.head().itertuples():\n",
" print(str(row.Ano.year))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"for row in df.itertuples():\n",
" try:\n",
" dff = dff.append({'IDOrcamento': row.IDOrcamento, 'NumOrc2': df[df['NOrc'].str.contains(str(row.NumOrc)+'.A/'+str(row.Ano.year))]['IDOrcamento'].iloc[0], 'NOrc': row.NOrc}, ignore_index=True)\n",
" # print(row.IDOrcamento, df[df['NOrc'].str.contains(str(row.NumOrc)+'.A')]['IDOrcamento'].iloc[0]) # , row.NOrc\n",
" except IndexError:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>IDOrcamento</th>\n",
" <th>NumOrc2</th>\n",
" <th>NOrc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3059</th>\n",
" <td>6128</td>\n",
" <td>6128</td>\n",
" <td>1426.A/2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3060</th>\n",
" <td>6129</td>\n",
" <td>6129</td>\n",
" <td>1427.A/2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3061</th>\n",
" <td>6130</td>\n",
" <td>6130</td>\n",
" <td>1428.A/2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3062</th>\n",
" <td>6131</td>\n",
" <td>6090</td>\n",
" <td>1403.B/2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3063</th>\n",
" <td>6132</td>\n",
" <td>6132</td>\n",
" <td>1429.A/2018</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IDOrcamento NumOrc2 NOrc\n",
"3059 6128 6128 1426.A/2018\n",
"3060 6129 6129 1427.A/2018\n",
"3061 6130 6130 1428.A/2018\n",
"3062 6131 6090 1403.B/2018\n",
"3063 6132 6132 1429.A/2018"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dff.tail()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>IDOrcamento</th>\n",
" <th>NumOrc2</th>\n",
" <th>NOrc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>3016</td>\n",
" <td>2943</td>\n",
" <td>1394.B/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>3017</td>\n",
" <td>3017</td>\n",
" <td>1453.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>3018</td>\n",
" <td>3018</td>\n",
" <td>1454.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>3019</td>\n",
" <td>3018</td>\n",
" <td>1454.B/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>3020</td>\n",
" <td>3018</td>\n",
" <td>1454.C/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>3021</td>\n",
" <td>3018</td>\n",
" <td>1454.D/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>3022</td>\n",
" <td>3018</td>\n",
" <td>1454.E/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>3023</td>\n",
" <td>3018</td>\n",
" <td>1454.F/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>3025</td>\n",
" <td>3025</td>\n",
" <td>1455.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>3026</td>\n",
" <td>2943</td>\n",
" <td>1394.C/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>3027</td>\n",
" <td>3027</td>\n",
" <td>1456.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>3028</td>\n",
" <td>3028</td>\n",
" <td>1457.A/2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>3029</td>\n",
" <td>3029</td>\n",
" <td>1458.A/2016</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IDOrcamento NumOrc2 NOrc\n",
"70 3016 2943 1394.B/2016\n",
"71 3017 3017 1453.A/2016\n",
"72 3018 3018 1454.A/2016\n",
"73 3019 3018 1454.B/2016\n",
"74 3020 3018 1454.C/2016\n",
"75 3021 3018 1454.D/2016\n",
"76 3022 3018 1454.E/2016\n",
"77 3023 3018 1454.F/2016\n",
"78 3025 3025 1455.A/2016\n",
"79 3026 2943 1394.C/2016\n",
"80 3027 3027 1456.A/2016\n",
"81 3028 3028 1457.A/2016\n",
"82 3029 3029 1458.A/2016"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dff.loc[70:82]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"dff.to_excel('OrcamentosNumOrcNovo.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@paulochf
Copy link

paulochf commented Jul 16, 2018

Código que substitui da célula [6] em diante do separe_email.ipynb:

emails_df = df.email_.str.split(";").apply(pd.Series).stack().reset_index(level=1, drop=True).to_frame('emails')

dffinal = df.drop("email_", axis=1).merge(emails_df, left_index=True, right_index=True)

Um detalhe é a falta de um índice único dos registros. Então, supondo o índice automático decorrente da importação do arquivo Excel como ID válido, esse ID é o usado no "join" (merge).

@rg3915
Copy link
Author

rg3915 commented Aug 1, 2018

Para transformar Query do Django em DataFrame:
df = pd.DataFrame(list(User.objects.all().values()))

@rg3915
Copy link
Author

rg3915 commented Aug 1, 2018

Pra mostrar somente algumas colunas:
df[['id', 'email']]

@rg3915
Copy link
Author

rg3915 commented Aug 28, 2018

Transformando todos os campos do DataFrame num jSON
df.T.apply(dict).tolist()

@rg3915
Copy link
Author

rg3915 commented Aug 28, 2018

Renomeando colunas
df.rename(columns={'old_name': 'new_name'})

@rg3915
Copy link
Author

rg3915 commented Aug 28, 2018

Selecionando as colunas de 10 em 10
df.iloc[:, :10]
com transposição
df.T.iloc[:10]

@rg3915
Copy link
Author

rg3915 commented Oct 11, 2018

Novo df com algumas colunas.
new = old.filter(['A','B','D'], axis=1)

@rg3915
Copy link
Author

rg3915 commented Oct 12, 2018

Como não inserir chaves com valor nulo no dicionário:
df.T.apply(lambda x: dict(x.dropna())).tolist()

@rg3915
Copy link
Author

rg3915 commented Oct 17, 2018

Retornando os valores cujo tamanho da string seja maior que...
df[df['foo'].str.len() > 50]['foo']

Mostrando o tamanho de cada célula
df[df['foo'].str.len() > 50]['foo'].str.len()

@rg3915
Copy link
Author

rg3915 commented Nov 9, 2018

Mostrando o tamanho de cada célula da coluna

df['foo_len'] = df['foo'].apply(len)
df[['foo_len', 'foo']]

Ou

df['foo'].str.len()

@rg3915
Copy link
Author

rg3915 commented Dec 5, 2018

Suponha que você tenha Cliente e Obra.

Pegando o ID do Cliente que está em Obra e trocando pelo nome do Cliente que está no outro DataFrame.

# JSON do Cliente com IDCliente e Cliente
dict_cliente = df_cliente[['IDCliente', 'Cliente']].T.apply(dict).tolist()

[
 {'IDCliente': 288, 'Cliente': 'Cliente Um'},
 {'IDCliente': 1, 'Cliente': 'Cliente Dois'},
 {'IDCliente': 959, 'Cliente': 'Cliente Três'},
]

Montando o dicionário que será usado como busca de cada Cliente a partir do seu ID.

_dict_cliente = {}
for item in dict_cliente:
    _dict_cliente[item['IDCliente']] = item['Cliente']

_dict_cliente

{
 288: 'Cliente Um',
 1: 'Cliente Dois',
 959: 'Cliente Três',
}

A partir desse dicionário fazemos a busca no outro DataFrame.

for row in df.itertuples():
    nome_cliente = _dict_cliente.get(row.IDCliente)
    print(row.IDCliente, nome_cliente)

288 Cliente Um
1   Cliente Dois
959 Cliente Três

@rg3915
Copy link
Author

rg3915 commented Dec 9, 2018

Retornando o valor máximo agrupado por ano.

df.groupby(['Ano'])['NumeroOrcamento'].max()
df.reset_index()

@rg3915
Copy link
Author

rg3915 commented Dec 10, 2018

Verificando data vazia:

for row in df.itertuples():
    if row.DataOrcamento is pd.NaT:
        print('Vazio')
    else:
        print(row.DataOrcamento)

@rg3915
Copy link
Author

rg3915 commented Dec 16, 2018

Definindo vários fillna diferentes por coluna:

values = {'last_name': '', 'occupation': '', 'age': 0}
df = df.fillna(value=values)
df.head()

@rg3915
Copy link
Author

rg3915 commented Aug 6, 2019

Se tiver problema com liblzma, faça um downgrade do Pandas para pandas==0.24.2.
https://stackoverflow.com/a/57115325

@rg3915
Copy link
Author

rg3915 commented Aug 6, 2019

Retorna o tamanho do maior objeto de cada coluna.

dict_sizes = {}
for col in df.columns:
    try:
        print(f'{col} max length: {df[col].map(len).max()}\n')
        dict_sizes[col] = df[col].map(len).max()
    except Exception as e:
        raise e
dict_sizes

@rg3915
Copy link
Author

rg3915 commented Sep 12, 2019

@rg3915
Copy link
Author

rg3915 commented Nov 6, 2019

dtype example
df['estoque'] =df['estoque'].fillna(0).astype(int)

@rg3915
Copy link
Author

rg3915 commented Nov 6, 2019

Pandas Dataframe df to Django
https://www.laurivan.com/save-pandas-dataframe-as-django-model/

Produto.objects.bulk_create(
    Produto(**item) for item in df.to_dict('records')
)

@rg3915
Copy link
Author

rg3915 commented Nov 13, 2019

Definindo os tipos das colunas com dtype

dict_types_annot = {
    'produto': str,
    'ncm': str,
    'preco': float,
    'estoque': 'Int64',
}

# Define os tipos das colunas
dff = df.astype(dict_types_annot, errors='ignore')

# Troca 'nan' por None e float por None.
dff = dff.replace({'nan': None, float('nan'): None})

dff.to_dict('records')

Produto.objects.bulk_create(
    Produto(**item) for item in dff.to_dict('records')
)

@rg3915
Copy link
Author

rg3915 commented Jan 22, 2020

Intersecção de dataframes

import pandas as pd
import numpy as np
import datetime
from random import randint

df1 = pd.DataFrame({
    'letters': ['A', 'B', 'C', 'D', 'E', 'J', 'K', 'M'],
    'B': np.random.randint(0, 10, 8),
})
df1

df2 = pd.DataFrame({
    'letters': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'U', 'Z'],
    'B': np.random.randint(0, 10, 26),
})
df2

# Retorna o que tem de comum nos dois dataframes.
pd.merge(df1, df2, how='inner', on='fruits')

# Retorna o que tem de comum, considerando o df1.
pd.merge(df1, df2, how='left', on='fruits')

# Retorna o que tem de comum, considerando o df2.
pd.merge(df1, df2, how='right', on='fruits')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment