Created
April 6, 2018 22:34
-
-
Save EdMan1022/5f320997bfe8ebf0299fc240bc15d14f to your computer and use it in GitHub Desktop.
Scrap for stack overflow answer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"Scrap work for answering [this question](https://stackoverflow.com/questions/49700822/merge-values-of-differents-csv-pandas)\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Manually creating and filling the dataframes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_g = pd.DataFrame()\n", | |
"df_seg = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_g.loc[:, 'Sitio'] = ['A', 'B']\n", | |
"df_g.loc[:, 'Country'] = ['DE - Germany', 'AR- Argentina']\n", | |
"df_g.loc[:, 'Espacio'] = ['3619717 - www.A.com.ar - Seccion - Seccion300x250B',\n", | |
" '3619717 - www.A.com.ar - Seccion - Seccion300x250B']\n", | |
"\n", | |
"df_seg.loc[:, 'Espacio'] = ['3619717 - www.A.com.ar - Seccion - Seccion300x250B', '3619717 - www.A.com.ar - Seccion - Seccion300x250B']\n", | |
"df_seg.loc[:, 'Country'] = ['DE - Germany', 'AR- Argentina']\n", | |
"df_seg.loc[:, 'Precio'] = [0.1, 0.2]\n", | |
"df_seg.loc[:, 'Sitio'] = ['A', 'A']\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<style type=\"text/css\" >\n", | |
"</style> \n", | |
"<table id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051\" > \n", | |
"<thead> <tr> \n", | |
" <th class=\"blank level0\" ></th> \n", | |
" <th class=\"col_heading level0 col0\" >Sitio</th> \n", | |
" <th class=\"col_heading level0 col1\" >Country</th> \n", | |
" <th class=\"col_heading level0 col2\" >Espacio</th> \n", | |
" </tr></thead> \n", | |
"<tbody> <tr> \n", | |
" <th id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051level0_row0\" class=\"row_heading level0 row0\" >0</th> \n", | |
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row0_col0\" class=\"data row0 col0\" >A</td> \n", | |
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row0_col1\" class=\"data row0 col1\" >DE - Germany</td> \n", | |
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row0_col2\" class=\"data row0 col2\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n", | |
" </tr> <tr> \n", | |
" <th id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051level0_row1\" class=\"row_heading level0 row1\" >1</th> \n", | |
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row1_col0\" class=\"data row1 col0\" >B</td> \n", | |
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row1_col1\" class=\"data row1 col1\" >AR- Argentina</td> \n", | |
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row1_col2\" class=\"data row1 col2\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n", | |
" </tr></tbody> \n", | |
"</table> " | |
], | |
"text/plain": [ | |
"<pandas.io.formats.style.Styler at 0x7fd89d2414a8>" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_g.style" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<style type=\"text/css\" >\n", | |
"</style> \n", | |
"<table id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051\" > \n", | |
"<thead> <tr> \n", | |
" <th class=\"blank level0\" ></th> \n", | |
" <th class=\"col_heading level0 col0\" >Espacio</th> \n", | |
" <th class=\"col_heading level0 col1\" >Country</th> \n", | |
" <th class=\"col_heading level0 col2\" >Precio</th> \n", | |
" <th class=\"col_heading level0 col3\" >Sitio</th> \n", | |
" </tr></thead> \n", | |
"<tbody> <tr> \n", | |
" <th id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051level0_row0\" class=\"row_heading level0 row0\" >0</th> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col0\" class=\"data row0 col0\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col1\" class=\"data row0 col1\" >DE - Germany</td> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col2\" class=\"data row0 col2\" >0.1</td> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col3\" class=\"data row0 col3\" >A</td> \n", | |
" </tr> <tr> \n", | |
" <th id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051level0_row1\" class=\"row_heading level0 row1\" >1</th> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col0\" class=\"data row1 col0\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col1\" class=\"data row1 col1\" >AR- Argentina</td> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col2\" class=\"data row1 col2\" >0.2</td> \n", | |
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col3\" class=\"data row1 col3\" >A</td> \n", | |
" </tr></tbody> \n", | |
"</table> " | |
], | |
"text/plain": [ | |
"<pandas.io.formats.style.Styler at 0x7fd89cf49550>" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_seg.style" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Country</th>\n", | |
" <th>Espacio</th>\n", | |
" <th>Precio</th>\n", | |
" <th>Sitio</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>DE - Germany</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>0.1</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>AR- Argentina</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>0.2</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Country Espacio Precio \\\n", | |
"0 DE - Germany 3619717 - www.A.com.ar - Seccion - Seccion300x... 0.1 \n", | |
"1 AR- Argentina 3619717 - www.A.com.ar - Seccion - Seccion300x... 0.2 \n", | |
"\n", | |
" Sitio \n", | |
"0 A \n", | |
"1 A " | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_g[['Country']].merge(df_seg)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sitio_x</th>\n", | |
" <th>Country</th>\n", | |
" <th>Espacio</th>\n", | |
" <th>Precio</th>\n", | |
" <th>Sitio_y</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A</td>\n", | |
" <td>DE - Germany</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>0.1</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>B</td>\n", | |
" <td>AR- Argentina</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>0.2</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sitio_x Country Espacio \\\n", | |
"0 A DE - Germany 3619717 - www.A.com.ar - Seccion - Seccion300x... \n", | |
"1 B AR- Argentina 3619717 - www.A.com.ar - Seccion - Seccion300x... \n", | |
"\n", | |
" Precio Sitio_y \n", | |
"0 0.1 A \n", | |
"1 0.2 A " | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_g[['Sitio', 'Country', 'Espacio']].merge(df_seg, on=['Country', 'Espacio'], how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sitio_x</th>\n", | |
" <th>Country_x</th>\n", | |
" <th>Espacio_x</th>\n", | |
" <th>Espacio_y</th>\n", | |
" <th>Country_y</th>\n", | |
" <th>Precio</th>\n", | |
" <th>Sitio_y</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A</td>\n", | |
" <td>DE - Germany</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>DE - Germany</td>\n", | |
" <td>0.1</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>B</td>\n", | |
" <td>AR- Argentina</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n", | |
" <td>AR- Argentina</td>\n", | |
" <td>0.2</td>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sitio_x Country_x Espacio_x \\\n", | |
"0 A DE - Germany 3619717 - www.A.com.ar - Seccion - Seccion300x... \n", | |
"1 B AR- Argentina 3619717 - www.A.com.ar - Seccion - Seccion300x... \n", | |
"\n", | |
" Espacio_y Country_y Precio \\\n", | |
"0 3619717 - www.A.com.ar - Seccion - Seccion300x... DE - Germany 0.1 \n", | |
"1 3619717 - www.A.com.ar - Seccion - Seccion300x... AR- Argentina 0.2 \n", | |
"\n", | |
" Sitio_y \n", | |
"0 A \n", | |
"1 A " | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_g.merge(df_seg, left_index=True, right_index=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment