Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save EdMan1022/5f320997bfe8ebf0299fc240bc15d14f to your computer and use it in GitHub Desktop.
Save EdMan1022/5f320997bfe8ebf0299fc240bc15d14f to your computer and use it in GitHub Desktop.
Scrap for stack overflow answer
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"Scrap work for answering [this question](https://stackoverflow.com/questions/49700822/merge-values-of-differents-csv-pandas)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Manually creating and filling the dataframes"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df_g = pd.DataFrame()\n",
"df_seg = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df_g.loc[:, 'Sitio'] = ['A', 'B']\n",
"df_g.loc[:, 'Country'] = ['DE - Germany', 'AR- Argentina']\n",
"df_g.loc[:, 'Espacio'] = ['3619717 - www.A.com.ar - Seccion - Seccion300x250B',\n",
" '3619717 - www.A.com.ar - Seccion - Seccion300x250B']\n",
"\n",
"df_seg.loc[:, 'Espacio'] = ['3619717 - www.A.com.ar - Seccion - Seccion300x250B', '3619717 - www.A.com.ar - Seccion - Seccion300x250B']\n",
"df_seg.loc[:, 'Country'] = ['DE - Germany', 'AR- Argentina']\n",
"df_seg.loc[:, 'Precio'] = [0.1, 0.2]\n",
"df_seg.loc[:, 'Sitio'] = ['A', 'A']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
"</style> \n",
"<table id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051\" > \n",
"<thead> <tr> \n",
" <th class=\"blank level0\" ></th> \n",
" <th class=\"col_heading level0 col0\" >Sitio</th> \n",
" <th class=\"col_heading level0 col1\" >Country</th> \n",
" <th class=\"col_heading level0 col2\" >Espacio</th> \n",
" </tr></thead> \n",
"<tbody> <tr> \n",
" <th id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051level0_row0\" class=\"row_heading level0 row0\" >0</th> \n",
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row0_col0\" class=\"data row0 col0\" >A</td> \n",
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row0_col1\" class=\"data row0 col1\" >DE - Germany</td> \n",
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row0_col2\" class=\"data row0 col2\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n",
" </tr> <tr> \n",
" <th id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051level0_row1\" class=\"row_heading level0 row1\" >1</th> \n",
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row1_col0\" class=\"data row1 col0\" >B</td> \n",
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row1_col1\" class=\"data row1 col1\" >AR- Argentina</td> \n",
" <td id=\"T_1e7c6fd0_39e8_11e8_8c69_bc5ff4e99051row1_col2\" class=\"data row1 col2\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n",
" </tr></tbody> \n",
"</table> "
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x7fd89d2414a8>"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_g.style"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
"</style> \n",
"<table id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051\" > \n",
"<thead> <tr> \n",
" <th class=\"blank level0\" ></th> \n",
" <th class=\"col_heading level0 col0\" >Espacio</th> \n",
" <th class=\"col_heading level0 col1\" >Country</th> \n",
" <th class=\"col_heading level0 col2\" >Precio</th> \n",
" <th class=\"col_heading level0 col3\" >Sitio</th> \n",
" </tr></thead> \n",
"<tbody> <tr> \n",
" <th id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051level0_row0\" class=\"row_heading level0 row0\" >0</th> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col0\" class=\"data row0 col0\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col1\" class=\"data row0 col1\" >DE - Germany</td> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col2\" class=\"data row0 col2\" >0.1</td> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row0_col3\" class=\"data row0 col3\" >A</td> \n",
" </tr> <tr> \n",
" <th id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051level0_row1\" class=\"row_heading level0 row1\" >1</th> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col0\" class=\"data row1 col0\" >3619717 - www.A.com.ar - Seccion - Seccion300x250B</td> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col1\" class=\"data row1 col1\" >AR- Argentina</td> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col2\" class=\"data row1 col2\" >0.2</td> \n",
" <td id=\"T_5584f786_39e8_11e8_8c69_bc5ff4e99051row1_col3\" class=\"data row1 col3\" >A</td> \n",
" </tr></tbody> \n",
"</table> "
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x7fd89cf49550>"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_seg.style"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country</th>\n",
" <th>Espacio</th>\n",
" <th>Precio</th>\n",
" <th>Sitio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>DE - Germany</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>0.1</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AR- Argentina</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>0.2</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Country Espacio Precio \\\n",
"0 DE - Germany 3619717 - www.A.com.ar - Seccion - Seccion300x... 0.1 \n",
"1 AR- Argentina 3619717 - www.A.com.ar - Seccion - Seccion300x... 0.2 \n",
"\n",
" Sitio \n",
"0 A \n",
"1 A "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_g[['Country']].merge(df_seg)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sitio_x</th>\n",
" <th>Country</th>\n",
" <th>Espacio</th>\n",
" <th>Precio</th>\n",
" <th>Sitio_y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>DE - Germany</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>0.1</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>B</td>\n",
" <td>AR- Argentina</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>0.2</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sitio_x Country Espacio \\\n",
"0 A DE - Germany 3619717 - www.A.com.ar - Seccion - Seccion300x... \n",
"1 B AR- Argentina 3619717 - www.A.com.ar - Seccion - Seccion300x... \n",
"\n",
" Precio Sitio_y \n",
"0 0.1 A \n",
"1 0.2 A "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_g[['Sitio', 'Country', 'Espacio']].merge(df_seg, on=['Country', 'Espacio'], how='left')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sitio_x</th>\n",
" <th>Country_x</th>\n",
" <th>Espacio_x</th>\n",
" <th>Espacio_y</th>\n",
" <th>Country_y</th>\n",
" <th>Precio</th>\n",
" <th>Sitio_y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>DE - Germany</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>DE - Germany</td>\n",
" <td>0.1</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>B</td>\n",
" <td>AR- Argentina</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>3619717 - www.A.com.ar - Seccion - Seccion300x...</td>\n",
" <td>AR- Argentina</td>\n",
" <td>0.2</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sitio_x Country_x Espacio_x \\\n",
"0 A DE - Germany 3619717 - www.A.com.ar - Seccion - Seccion300x... \n",
"1 B AR- Argentina 3619717 - www.A.com.ar - Seccion - Seccion300x... \n",
"\n",
" Espacio_y Country_y Precio \\\n",
"0 3619717 - www.A.com.ar - Seccion - Seccion300x... DE - Germany 0.1 \n",
"1 3619717 - www.A.com.ar - Seccion - Seccion300x... AR- Argentina 0.2 \n",
"\n",
" Sitio_y \n",
"0 A \n",
"1 A "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_g.merge(df_seg, left_index=True, right_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment