vaclavdekanovsky · October 3, 2020 20:11
diff --git a/Pandas_Upsample_All_Together.ipynb b/Pandas_Upsample_All_Together.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Putting it all together\n",
    "Upsample multiple groups with an average"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import io"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CAT</th>\n",
       "      <th>VALUE</th>\n",
       "      <th>count</th>\n",
       "      <th>daily_average</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DATE</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1900-01-02</th>\n",
       "      <td>abc</td>\n",
       "      <td>20</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-03</th>\n",
       "      <td>abc</td>\n",
       "      <td>20</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-04</th>\n",
       "      <td>abc</td>\n",
       "      <td>15</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-05</th>\n",
       "      <td>abc</td>\n",
       "      <td>15</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-06</th>\n",
       "      <td>abc</td>\n",
       "      <td>15</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-02</th>\n",
       "      <td>efg</td>\n",
       "      <td>40</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-03</th>\n",
       "      <td>efg</td>\n",
       "      <td>40</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-04</th>\n",
       "      <td>efg</td>\n",
       "      <td>40</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-05</th>\n",
       "      <td>efg</td>\n",
       "      <td>40</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900-01-06</th>\n",
       "      <td>efg</td>\n",
       "      <td>12</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            CAT  VALUE  count  daily_average\n",
       "DATE                                        \n",
       "1900-01-02  abc     20    2.0           10.0\n",
       "1900-01-03  abc     20    2.0           10.0\n",
       "1900-01-04  abc     15    3.0            5.0\n",
       "1900-01-05  abc     15    3.0            5.0\n",
       "1900-01-06  abc     15    3.0            5.0\n",
       "1900-01-02  efg     40    4.0           10.0\n",
       "1900-01-03  efg     40    4.0           10.0\n",
       "1900-01-04  efg     40    4.0           10.0\n",
       "1900-01-05  efg     40    4.0           10.0\n",
       "1900-01-06  efg     12    1.0           12.0"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data=\"\"\"\n",
    "CAT|DATE|VALUE\n",
    "abc|0101|10\n",
    "abc|0103|20\n",
    "abc|0106|15\n",
    "efg|0101|10\n",
    "efg|0105|40\n",
    "efg|0106|12\"\"\"\n",
    "df = pd.read_csv(io.StringIO(data), skiprows=1, sep=\"|\", dtype={\"DATE\":\"str\"})\n",
    "df[\"DATE\"] = pd.to_datetime(df[\"DATE\"], format=\"%m%d\")\n",
    "\n",
    "updated_df = []\n",
    "for gr in df[\"CAT\"].unique():\n",
    "    \n",
    "    subdf = df[df[\"CAT\"]==gr].set_index(\"DATE\").resample(\"1D\", label=\"right\")\n",
    "    \n",
    "    # back fill the data\n",
    "    res = subdf.bfill()\n",
    "    \n",
    "    # Calculate the number of hours in each group\n",
    "    resampled_groups = subdf.groups\n",
    "    df_groups = pd.DataFrame(resampled_groups, index=[\"group\"]).T\n",
    "    df_groups\n",
    "    \n",
    "    s = df_groups.groupby(\"group\").size()\n",
    "    s.name = \"count\"\n",
    "    s_counts = df_groups.join(s, on=\"group\")[\"count\"]\n",
    "    \n",
    "    res = res.join(s_counts)\n",
    "    res[\"daily_average\"] = res[\"VALUE\"]/res[\"count\"]\n",
    "    \n",
    "    # add to list\n",
    "    updated_df.append(res)\n",
    "    \n",
    "pd.concat(updated_df).dropna()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Putting it all together\n",
	"Upsample multiple groups with an average"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import io"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>CAT</th>\n",
	" <th>VALUE</th>\n",
	" <th>count</th>\n",
	" <th>daily_average</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>DATE</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1900-01-02</th>\n",
	" <td>abc</td>\n",
	" <td>20</td>\n",
	" <td>2.0</td>\n",
	" <td>10.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-03</th>\n",
	" <td>abc</td>\n",
	" <td>20</td>\n",
	" <td>2.0</td>\n",
	" <td>10.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-04</th>\n",
	" <td>abc</td>\n",
	" <td>15</td>\n",
	" <td>3.0</td>\n",
	" <td>5.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-05</th>\n",
	" <td>abc</td>\n",
	" <td>15</td>\n",
	" <td>3.0</td>\n",
	" <td>5.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-06</th>\n",
	" <td>abc</td>\n",
	" <td>15</td>\n",
	" <td>3.0</td>\n",
	" <td>5.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-02</th>\n",
	" <td>efg</td>\n",
	" <td>40</td>\n",
	" <td>4.0</td>\n",
	" <td>10.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-03</th>\n",
	" <td>efg</td>\n",
	" <td>40</td>\n",
	" <td>4.0</td>\n",
	" <td>10.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-04</th>\n",
	" <td>efg</td>\n",
	" <td>40</td>\n",
	" <td>4.0</td>\n",
	" <td>10.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-05</th>\n",
	" <td>efg</td>\n",
	" <td>40</td>\n",
	" <td>4.0</td>\n",
	" <td>10.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1900-01-06</th>\n",
	" <td>efg</td>\n",
	" <td>12</td>\n",
	" <td>1.0</td>\n",
	" <td>12.0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" CAT VALUE count daily_average\n",
	"DATE \n",
	"1900-01-02 abc 20 2.0 10.0\n",
	"1900-01-03 abc 20 2.0 10.0\n",
	"1900-01-04 abc 15 3.0 5.0\n",
	"1900-01-05 abc 15 3.0 5.0\n",
	"1900-01-06 abc 15 3.0 5.0\n",
	"1900-01-02 efg 40 4.0 10.0\n",
	"1900-01-03 efg 40 4.0 10.0\n",
	"1900-01-04 efg 40 4.0 10.0\n",
	"1900-01-05 efg 40 4.0 10.0\n",
	"1900-01-06 efg 12 1.0 12.0"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data=\"\"\"\n",
	"CAT\|DATE\|VALUE\n",
	"abc\|0101\|10\n",
	"abc\|0103\|20\n",
	"abc\|0106\|15\n",
	"efg\|0101\|10\n",
	"efg\|0105\|40\n",
	"efg\|0106\|12\"\"\"\n",
	"df = pd.read_csv(io.StringIO(data), skiprows=1, sep=\"\|\", dtype={\"DATE\":\"str\"})\n",
	"df[\"DATE\"] = pd.to_datetime(df[\"DATE\"], format=\"%m%d\")\n",
	"\n",
	"updated_df = []\n",
	"for gr in df[\"CAT\"].unique():\n",
	" \n",
	" subdf = df[df[\"CAT\"]==gr].set_index(\"DATE\").resample(\"1D\", label=\"right\")\n",
	" \n",
	" # back fill the data\n",
	" res = subdf.bfill()\n",
	" \n",
	" # Calculate the number of hours in each group\n",
	" resampled_groups = subdf.groups\n",
	" df_groups = pd.DataFrame(resampled_groups, index=[\"group\"]).T\n",
	" df_groups\n",
	" \n",
	" s = df_groups.groupby(\"group\").size()\n",
	" s.name = \"count\"\n",
	" s_counts = df_groups.join(s, on=\"group\")[\"count\"]\n",
	" \n",
	" res = res.join(s_counts)\n",
	" res[\"daily_average\"] = res[\"VALUE\"]/res[\"count\"]\n",
	" \n",
	" # add to list\n",
	" updated_df.append(res)\n",
	" \n",
	"pd.concat(updated_df).dropna()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}
No results found