ftnext · February 24, 2018 03:51 · ftnext · Feb 24, 2018 · ftnext · Feb 24, 2018
diff --git a/dataframe_max_pranctice_see_advise-20180224.ipynb b/dataframe_max_pranctice_see_advise-20180224.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使うデータ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>no</th>\n",
       "      <th>config_id</th>\n",
       "      <th>salesdate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2011-11-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2011-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2012-02-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-06-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2011-08-04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  no  config_id   salesdate\n",
       "0   1   1          1  2011-11-12\n",
       "1   2   3          3  2011-12-03\n",
       "2   3   2          1  2012-02-10\n",
       "3   4   1          2  2012-01-04\n",
       "4   5   2          3  2012-06-03\n",
       "5   6   1          3  2011-08-04"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)\n",
    "df3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ほしいもの\n",
    "\n",
    "config_idでグループ化し、各グループをsalesdateが最新の行で代表させたい"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>no</th>\n",
       "      <th>config_id</th>\n",
       "      <th>salesdate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2012-02-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-06-03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  no  config_id   salesdate\n",
       "0   3   2          1  2012-02-10\n",
       "1   4   1          2  2012-01-04\n",
       "2   5   2          3  2012-06-03"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ans_df3 = pd.read_csv('select_max_answer-3.csv', sep=',', header=0, index_col=None)\n",
    "ans_df3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3通りのやり方を助言いただいた\n",
    "1. datetimeに型変換してidxmax()を利用する\n",
    "1. 型変換せずにグループごとの最大値のインデックスを取得する\n",
    "1. グループごとに条件に合う行を見つける\n",
    "\n",
    "https://qiita.com/ftnext/items/48b7baa49217fdc37ae4#comment-8f7d0214dd2abdc18977"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 助言1: datetimeに型変換してidxmax()を利用 を試す"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# TypeError: reduction operation 'argmax' not allowed for this dtype となる\n",
    "# idx = df3.groupby('config_id')['salesdate'].idxmax()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6 entries, 0 to 5\n",
      "Data columns (total 4 columns):\n",
      "id           6 non-null int64\n",
      "no           6 non-null int64\n",
      "config_id    6 non-null int64\n",
      "salesdate    6 non-null object\n",
      "dtypes: int64(3), object(1)\n",
      "memory usage: 272.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "df3.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "salesdateはobject型。\n",
    "\n",
    "→idxmaxを使うためには型の変換が必要になるようだ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6 entries, 0 to 5\n",
      "Data columns (total 4 columns):\n",
      "id           6 non-null int64\n",
      "no           6 non-null int64\n",
      "config_id    6 non-null int64\n",
      "salesdate    6 non-null datetime64[ns]\n",
      "dtypes: datetime64[ns](1), int64(3)\n",
      "memory usage: 272.0 bytes\n"
     ]
    }
   ],
   "source": [
    "df3['salesdate'] = pd.to_datetime(df3['salesdate'])\n",
    "df3.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "config_id\n",
       "1    2\n",
       "2    3\n",
       "3    4\n",
       "Name: salesdate, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idx = df3.groupby('config_id')['salesdate'].idxmax()\n",
    "idx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>no</th>\n",
       "      <th>config_id</th>\n",
       "      <th>salesdate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2012-02-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-06-03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  no  config_id  salesdate\n",
       "2   3   2          1 2012-02-10\n",
       "3   4   1          2 2012-01-04\n",
       "4   5   2          3 2012-06-03"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3.loc[idx]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 読み込み時に型変換する場合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6 entries, 0 to 5\n",
      "Data columns (total 4 columns):\n",
      "id           6 non-null int64\n",
      "no           6 non-null int64\n",
      "config_id    6 non-null int64\n",
      "salesdate    6 non-null datetime64[ns]\n",
      "dtypes: datetime64[ns](1), int64(3)\n",
      "memory usage: 272.0 bytes\n"
     ]
    }
   ],
   "source": [
    "df3_2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None, parse_dates=['salesdate'])\n",
    "df3_2.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>no</th>\n",
       "      <th>config_id</th>\n",
       "      <th>salesdate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2012-02-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-06-03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  no  config_id  salesdate\n",
       "2   3   2          1 2012-02-10\n",
       "3   4   1          2 2012-01-04\n",
       "4   5   2          3 2012-06-03"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3_2.loc[df3_2.groupby('config_id')['salesdate'].idxmax()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 助言2: 型変換せずにグループごとの最大値のインデックスを取得する を試す"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>no</th>\n",
       "      <th>config_id</th>\n",
       "      <th>salesdate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2012-02-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-06-03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  no  config_id   salesdate\n",
       "2   3   2          1  2012-02-10\n",
       "3   4   1          2  2012-01-04\n",
       "4   5   2          3  2012-06-03"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)\n",
    "idx = df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index[0])\n",
    "df2.loc[idx]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "各部ごとに動きの確認。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1, 0    2011-11-12\n",
      "2    2012-02-10\n",
      "Name: salesdate, dtype: object)\n",
      "---\n",
      "(2, 3    2012-01-04\n",
      "Name: salesdate, dtype: object)\n",
      "---\n",
      "(3, 1    2011-12-03\n",
      "4    2012-06-03\n",
      "5    2011-08-04\n",
      "Name: salesdate, dtype: object)\n",
      "---\n"
     ]
    }
   ],
   "source": [
    "# グループ化したあとのsalesdateの情報が取れている\n",
    "for group in df2.groupby('config_id')['salesdate']:\n",
    "    print(group)\n",
    "    print('---')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "config_id   \n",
       "1          2    2012-02-10\n",
       "           0    2011-11-12\n",
       "2          3    2012-01-04\n",
       "3          4    2012-06-03\n",
       "           1    2011-12-03\n",
       "           5    2011-08-04\n",
       "Name: salesdate, dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "config_id\n",
       "1       Int64Index([2, 0], dtype='int64')\n",
       "2          Int64Index([3], dtype='int64')\n",
       "3    Int64Index([4, 1, 5], dtype='int64')\n",
       "Name: salesdate, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# グループごとにsalesdateの降順で並べ替えた後のindexが取れる\n",
    "df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "config_id\n",
       "1    2\n",
       "2    3\n",
       "3    4\n",
       "Name: salesdate, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# グループごとにsalesdateの降順で並べ替えた後のindexのうち先頭のものを取得\n",
    "# （=グループごとにsalesdateが最大値の行のindexを取得した）\n",
    "df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 助言3: グループごとに条件に合う行を見つける を試す"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "config_id\n",
       "1    2012-02-10\n",
       "2    2012-01-04\n",
       "3    2012-06-03\n",
       "dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# グループごとのsalesdateの最大値\n",
    "df4.groupby('config_id').apply(lambda x: x['salesdate'].max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "config_id   \n",
       "1          0    False\n",
       "           2     True\n",
       "2          3     True\n",
       "3          1    False\n",
       "           4     True\n",
       "           5    False\n",
       "Name: salesdate, dtype: bool"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 各行が所属するグループのsalesdateの最大値に一致するか\n",
    "df4.groupby('config_id').apply(lambda x: x['salesdate']==x['salesdate'].max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>no</th>\n",
       "      <th>config_id</th>\n",
       "      <th>salesdate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>config_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2012-02-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-06-03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             id  no  config_id   salesdate\n",
       "config_id                                 \n",
       "1         2   3   2          1  2012-02-10\n",
       "2         3   4   1          2  2012-01-04\n",
       "3         4   5   2          3  2012-06-03"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4.groupby('config_id').apply(lambda x: x[x['salesdate']==x['salesdate'].max()])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Cookbookの `ln:84` の書き方も参考になる。\n",
    "\n",
    "https://pandas.pydata.org/pandas-docs/stable/cookbook.html#grouping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "config_id\n",
       "1    3\n",
       "2    4\n",
       "3    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# salesdateがグループで最大の行のidを取り出す\n",
    "df4_2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None, parse_dates=['salesdate'])\n",
    "df4_2.groupby('config_id').apply(lambda subf: subf['id'][subf['salesdate'].idxmax()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>no</th>\n",
       "      <th>config_id</th>\n",
       "      <th>salesdate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2012-02-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2012-06-03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  no  config_id  salesdate\n",
       "2   3   2          1 2012-02-10\n",
       "3   4   1          2 2012-01-04\n",
       "4   5   2          3 2012-06-03"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# idxmaxをapplyから適用できたが、\n",
    "# インデックスを取得するだけなら ln [7] のように単にidxmaxを使った方がスッキリしているように思う\n",
    "idx = df4_2.groupby('config_id').apply(lambda subf: subf['salesdate'].idxmax())\n",
    "df4_2.loc[idx]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## 使うデータ"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>no</th>\n",
	" <th>config_id</th>\n",
	" <th>salesdate</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>2011-11-12</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>3</td>\n",
	" <td>2011-12-03</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2012-02-10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2012-01-04</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>2012-06-03</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>6</td>\n",
	" <td>1</td>\n",
	" <td>3</td>\n",
	" <td>2011-08-04</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id no config_id salesdate\n",
	"0 1 1 1 2011-11-12\n",
	"1 2 3 3 2011-12-03\n",
	"2 3 2 1 2012-02-10\n",
	"3 4 1 2 2012-01-04\n",
	"4 5 2 3 2012-06-03\n",
	"5 6 1 3 2011-08-04"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df3 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)\n",
	"df3"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# ほしいもの\n",
	"\n",
	"config_idでグループ化し、各グループをsalesdateが最新の行で代表させたい"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>no</th>\n",
	" <th>config_id</th>\n",
	" <th>salesdate</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2012-02-10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2012-01-04</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>2012-06-03</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id no config_id salesdate\n",
	"0 3 2 1 2012-02-10\n",
	"1 4 1 2 2012-01-04\n",
	"2 5 2 3 2012-06-03"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ans_df3 = pd.read_csv('select_max_answer-3.csv', sep=',', header=0, index_col=None)\n",
	"ans_df3"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"3通りのやり方を助言いただいた\n",
	"1. datetimeに型変換してidxmax()を利用する\n",
	"1. 型変換せずにグループごとの最大値のインデックスを取得する\n",
	"1. グループごとに条件に合う行を見つける\n",
	"\n",
	"https://qiita.com/ftnext/items/48b7baa49217fdc37ae4#comment-8f7d0214dd2abdc18977"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## 助言1: datetimeに型変換してidxmax()を利用を試す"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"# TypeError: reduction operation 'argmax' not allowed for this dtype となる\n",
	"# idx = df3.groupby('config_id')['salesdate'].idxmax()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"<class 'pandas.core.frame.DataFrame'>\n",
	"RangeIndex: 6 entries, 0 to 5\n",
	"Data columns (total 4 columns):\n",
	"id 6 non-null int64\n",
	"no 6 non-null int64\n",
	"config_id 6 non-null int64\n",
	"salesdate 6 non-null object\n",
	"dtypes: int64(3), object(1)\n",
	"memory usage: 272.0+ bytes\n"
	]
	}
	],
	"source": [
	"df3.info()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"salesdateはobject型。\n",
	"\n",
	"→idxmaxを使うためには型の変換が必要になるようだ"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"<class 'pandas.core.frame.DataFrame'>\n",
	"RangeIndex: 6 entries, 0 to 5\n",
	"Data columns (total 4 columns):\n",
	"id 6 non-null int64\n",
	"no 6 non-null int64\n",
	"config_id 6 non-null int64\n",
	"salesdate 6 non-null datetime64[ns]\n",
	"dtypes: datetime64[ns](1), int64(3)\n",
	"memory usage: 272.0 bytes\n"
	]
	}
	],
	"source": [
	"df3['salesdate'] = pd.to_datetime(df3['salesdate'])\n",
	"df3.info()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"config_id\n",
	"1 2\n",
	"2 3\n",
	"3 4\n",
	"Name: salesdate, dtype: int64"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"idx = df3.groupby('config_id')['salesdate'].idxmax()\n",
	"idx"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>no</th>\n",
	" <th>config_id</th>\n",
	" <th>salesdate</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2012-02-10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2012-01-04</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>2012-06-03</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id no config_id salesdate\n",
	"2 3 2 1 2012-02-10\n",
	"3 4 1 2 2012-01-04\n",
	"4 5 2 3 2012-06-03"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df3.loc[idx]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 読み込み時に型変換する場合"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"<class 'pandas.core.frame.DataFrame'>\n",
	"RangeIndex: 6 entries, 0 to 5\n",
	"Data columns (total 4 columns):\n",
	"id 6 non-null int64\n",
	"no 6 non-null int64\n",
	"config_id 6 non-null int64\n",
	"salesdate 6 non-null datetime64[ns]\n",
	"dtypes: datetime64[ns](1), int64(3)\n",
	"memory usage: 272.0 bytes\n"
	]
	}
	],
	"source": [
	"df3_2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None, parse_dates=['salesdate'])\n",
	"df3_2.info()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>no</th>\n",
	" <th>config_id</th>\n",
	" <th>salesdate</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2012-02-10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2012-01-04</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>2012-06-03</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id no config_id salesdate\n",
	"2 3 2 1 2012-02-10\n",
	"3 4 1 2 2012-01-04\n",
	"4 5 2 3 2012-06-03"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df3_2.loc[df3_2.groupby('config_id')['salesdate'].idxmax()]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## 助言2: 型変換せずにグループごとの最大値のインデックスを取得するを試す"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>no</th>\n",
	" <th>config_id</th>\n",
	" <th>salesdate</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2012-02-10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2012-01-04</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>2012-06-03</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id no config_id salesdate\n",
	"2 3 2 1 2012-02-10\n",
	"3 4 1 2 2012-01-04\n",
	"4 5 2 3 2012-06-03"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)\n",
	"idx = df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index[0])\n",
	"df2.loc[idx]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"各部ごとに動きの確認。"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(1, 0 2011-11-12\n",
	"2 2012-02-10\n",
	"Name: salesdate, dtype: object)\n",
	"---\n",
	"(2, 3 2012-01-04\n",
	"Name: salesdate, dtype: object)\n",
	"---\n",
	"(3, 1 2011-12-03\n",
	"4 2012-06-03\n",
	"5 2011-08-04\n",
	"Name: salesdate, dtype: object)\n",
	"---\n"
	]
	}
	],
	"source": [
	"# グループ化したあとのsalesdateの情報が取れている\n",
	"for group in df2.groupby('config_id')['salesdate']:\n",
	" print(group)\n",
	" print('---')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"config_id \n",
	"1 2 2012-02-10\n",
	" 0 2011-11-12\n",
	"2 3 2012-01-04\n",
	"3 4 2012-06-03\n",
	" 1 2011-12-03\n",
	" 5 2011-08-04\n",
	"Name: salesdate, dtype: object"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"config_id\n",
	"1 Int64Index([2, 0], dtype='int64')\n",
	"2 Int64Index([3], dtype='int64')\n",
	"3 Int64Index([4, 1, 5], dtype='int64')\n",
	"Name: salesdate, dtype: object"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# グループごとにsalesdateの降順で並べ替えた後のindexが取れる\n",
	"df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"config_id\n",
	"1 2\n",
	"2 3\n",
	"3 4\n",
	"Name: salesdate, dtype: int64"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# グループごとにsalesdateの降順で並べ替えた後のindexのうち先頭のものを取得\n",
	"# （=グループごとにsalesdateが最大値の行のindexを取得した）\n",
	"df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index[0])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## 助言3: グループごとに条件に合う行を見つけるを試す"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"df4 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"config_id\n",
	"1 2012-02-10\n",
	"2 2012-01-04\n",
	"3 2012-06-03\n",
	"dtype: object"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# グループごとのsalesdateの最大値\n",
	"df4.groupby('config_id').apply(lambda x: x['salesdate'].max())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"config_id \n",
	"1 0 False\n",
	" 2 True\n",
	"2 3 True\n",
	"3 1 False\n",
	" 4 True\n",
	" 5 False\n",
	"Name: salesdate, dtype: bool"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# 各行が所属するグループのsalesdateの最大値に一致するか\n",
	"df4.groupby('config_id').apply(lambda x: x['salesdate']==x['salesdate'].max())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>no</th>\n",
	" <th>config_id</th>\n",
	" <th>salesdate</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>config_id</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2012-02-10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2012-01-04</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>2012-06-03</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id no config_id salesdate\n",
	"config_id \n",
	"1 2 3 2 1 2012-02-10\n",
	"2 3 4 1 2 2012-01-04\n",
	"3 4 5 2 3 2012-06-03"
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df4.groupby('config_id').apply(lambda x: x[x['salesdate']==x['salesdate'].max()])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Cookbookの `ln:84` の書き方も参考になる。\n",
	"\n",
	"https://pandas.pydata.org/pandas-docs/stable/cookbook.html#grouping"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"config_id\n",
	"1 3\n",
	"2 4\n",
	"3 5\n",
	"dtype: int64"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# salesdateがグループで最大の行のidを取り出す\n",
	"df4_2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None, parse_dates=['salesdate'])\n",
	"df4_2.groupby('config_id').apply(lambda subf: subf['id'][subf['salesdate'].idxmax()])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>no</th>\n",
	" <th>config_id</th>\n",
	" <th>salesdate</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2012-02-10</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2012-01-04</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>2012-06-03</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id no config_id salesdate\n",
	"2 3 2 1 2012-02-10\n",
	"3 4 1 2 2012-01-04\n",
	"4 5 2 3 2012-06-03"
	]
	},
	"execution_count": 21,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# idxmaxをapplyから適用できたが、\n",
	"# インデックスを取得するだけなら ln [7] のように単にidxmaxを使った方がスッキリしているように思う\n",
	"idx = df4_2.groupby('config_id').apply(lambda subf: subf['salesdate'].idxmax())\n",
	"df4_2.loc[idx]"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found