Last active
February 24, 2018 03:51
-
-
Save ftnext/2ba44f0e43898f5ddeeeae2076a2cb01 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 使うデータ" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>id</th>\n", | |
| " <th>no</th>\n", | |
| " <th>config_id</th>\n", | |
| " <th>salesdate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2011-11-12</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2011-12-03</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2012-02-10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2012-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2012-06-03</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>6</td>\n", | |
| " <td>1</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2011-08-04</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " id no config_id salesdate\n", | |
| "0 1 1 1 2011-11-12\n", | |
| "1 2 3 3 2011-12-03\n", | |
| "2 3 2 1 2012-02-10\n", | |
| "3 4 1 2 2012-01-04\n", | |
| "4 5 2 3 2012-06-03\n", | |
| "5 6 1 3 2011-08-04" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df3 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)\n", | |
| "df3" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# ほしいもの\n", | |
| "\n", | |
| "config_idでグループ化し、各グループをsalesdateが最新の行で代表させたい" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>id</th>\n", | |
| " <th>no</th>\n", | |
| " <th>config_id</th>\n", | |
| " <th>salesdate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2012-02-10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2012-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>5</td>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2012-06-03</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " id no config_id salesdate\n", | |
| "0 3 2 1 2012-02-10\n", | |
| "1 4 1 2 2012-01-04\n", | |
| "2 5 2 3 2012-06-03" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ans_df3 = pd.read_csv('select_max_answer-3.csv', sep=',', header=0, index_col=None)\n", | |
| "ans_df3" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "3通りのやり方を助言いただいた\n", | |
| "1. datetimeに型変換してidxmax()を利用する\n", | |
| "1. 型変換せずにグループごとの最大値のインデックスを取得する\n", | |
| "1. グループごとに条件に合う行を見つける\n", | |
| "\n", | |
| "https://qiita.com/ftnext/items/48b7baa49217fdc37ae4#comment-8f7d0214dd2abdc18977" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 助言1: datetimeに型変換してidxmax()を利用 を試す" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# TypeError: reduction operation 'argmax' not allowed for this dtype となる\n", | |
| "# idx = df3.groupby('config_id')['salesdate'].idxmax()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "RangeIndex: 6 entries, 0 to 5\n", | |
| "Data columns (total 4 columns):\n", | |
| "id 6 non-null int64\n", | |
| "no 6 non-null int64\n", | |
| "config_id 6 non-null int64\n", | |
| "salesdate 6 non-null object\n", | |
| "dtypes: int64(3), object(1)\n", | |
| "memory usage: 272.0+ bytes\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df3.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "salesdateはobject型。\n", | |
| "\n", | |
| "→idxmaxを使うためには型の変換が必要になるようだ" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "RangeIndex: 6 entries, 0 to 5\n", | |
| "Data columns (total 4 columns):\n", | |
| "id 6 non-null int64\n", | |
| "no 6 non-null int64\n", | |
| "config_id 6 non-null int64\n", | |
| "salesdate 6 non-null datetime64[ns]\n", | |
| "dtypes: datetime64[ns](1), int64(3)\n", | |
| "memory usage: 272.0 bytes\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df3['salesdate'] = pd.to_datetime(df3['salesdate'])\n", | |
| "df3.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "config_id\n", | |
| "1 2\n", | |
| "2 3\n", | |
| "3 4\n", | |
| "Name: salesdate, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "idx = df3.groupby('config_id')['salesdate'].idxmax()\n", | |
| "idx" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>id</th>\n", | |
| " <th>no</th>\n", | |
| " <th>config_id</th>\n", | |
| " <th>salesdate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2012-02-10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2012-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2012-06-03</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " id no config_id salesdate\n", | |
| "2 3 2 1 2012-02-10\n", | |
| "3 4 1 2 2012-01-04\n", | |
| "4 5 2 3 2012-06-03" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df3.loc[idx]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 読み込み時に型変換する場合" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "RangeIndex: 6 entries, 0 to 5\n", | |
| "Data columns (total 4 columns):\n", | |
| "id 6 non-null int64\n", | |
| "no 6 non-null int64\n", | |
| "config_id 6 non-null int64\n", | |
| "salesdate 6 non-null datetime64[ns]\n", | |
| "dtypes: datetime64[ns](1), int64(3)\n", | |
| "memory usage: 272.0 bytes\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df3_2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None, parse_dates=['salesdate'])\n", | |
| "df3_2.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>id</th>\n", | |
| " <th>no</th>\n", | |
| " <th>config_id</th>\n", | |
| " <th>salesdate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2012-02-10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2012-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2012-06-03</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " id no config_id salesdate\n", | |
| "2 3 2 1 2012-02-10\n", | |
| "3 4 1 2 2012-01-04\n", | |
| "4 5 2 3 2012-06-03" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df3_2.loc[df3_2.groupby('config_id')['salesdate'].idxmax()]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 助言2: 型変換せずにグループごとの最大値のインデックスを取得する を試す" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>id</th>\n", | |
| " <th>no</th>\n", | |
| " <th>config_id</th>\n", | |
| " <th>salesdate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2012-02-10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2012-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2012-06-03</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " id no config_id salesdate\n", | |
| "2 3 2 1 2012-02-10\n", | |
| "3 4 1 2 2012-01-04\n", | |
| "4 5 2 3 2012-06-03" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)\n", | |
| "idx = df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index[0])\n", | |
| "df2.loc[idx]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "各部ごとに動きの確認。" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "(1, 0 2011-11-12\n", | |
| "2 2012-02-10\n", | |
| "Name: salesdate, dtype: object)\n", | |
| "---\n", | |
| "(2, 3 2012-01-04\n", | |
| "Name: salesdate, dtype: object)\n", | |
| "---\n", | |
| "(3, 1 2011-12-03\n", | |
| "4 2012-06-03\n", | |
| "5 2011-08-04\n", | |
| "Name: salesdate, dtype: object)\n", | |
| "---\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# グループ化したあとのsalesdateの情報が取れている\n", | |
| "for group in df2.groupby('config_id')['salesdate']:\n", | |
| " print(group)\n", | |
| " print('---')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "config_id \n", | |
| "1 2 2012-02-10\n", | |
| " 0 2011-11-12\n", | |
| "2 3 2012-01-04\n", | |
| "3 4 2012-06-03\n", | |
| " 1 2011-12-03\n", | |
| " 5 2011-08-04\n", | |
| "Name: salesdate, dtype: object" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "config_id\n", | |
| "1 Int64Index([2, 0], dtype='int64')\n", | |
| "2 Int64Index([3], dtype='int64')\n", | |
| "3 Int64Index([4, 1, 5], dtype='int64')\n", | |
| "Name: salesdate, dtype: object" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# グループごとにsalesdateの降順で並べ替えた後のindexが取れる\n", | |
| "df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "config_id\n", | |
| "1 2\n", | |
| "2 3\n", | |
| "3 4\n", | |
| "Name: salesdate, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# グループごとにsalesdateの降順で並べ替えた後のindexのうち先頭のものを取得\n", | |
| "# (=グループごとにsalesdateが最大値の行のindexを取得した)\n", | |
| "df2.groupby('config_id')['salesdate'].apply(lambda x: x.sort_values(ascending=False).index[0])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 助言3: グループごとに条件に合う行を見つける を試す" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df4 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "config_id\n", | |
| "1 2012-02-10\n", | |
| "2 2012-01-04\n", | |
| "3 2012-06-03\n", | |
| "dtype: object" | |
| ] | |
| }, | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# グループごとのsalesdateの最大値\n", | |
| "df4.groupby('config_id').apply(lambda x: x['salesdate'].max())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "config_id \n", | |
| "1 0 False\n", | |
| " 2 True\n", | |
| "2 3 True\n", | |
| "3 1 False\n", | |
| " 4 True\n", | |
| " 5 False\n", | |
| "Name: salesdate, dtype: bool" | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# 各行が所属するグループのsalesdateの最大値に一致するか\n", | |
| "df4.groupby('config_id').apply(lambda x: x['salesdate']==x['salesdate'].max())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>id</th>\n", | |
| " <th>no</th>\n", | |
| " <th>config_id</th>\n", | |
| " <th>salesdate</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>config_id</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2012-02-10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2012-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2012-06-03</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " id no config_id salesdate\n", | |
| "config_id \n", | |
| "1 2 3 2 1 2012-02-10\n", | |
| "2 3 4 1 2 2012-01-04\n", | |
| "3 4 5 2 3 2012-06-03" | |
| ] | |
| }, | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df4.groupby('config_id').apply(lambda x: x[x['salesdate']==x['salesdate'].max()])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Cookbookの `ln:84` の書き方も参考になる。\n", | |
| "\n", | |
| "https://pandas.pydata.org/pandas-docs/stable/cookbook.html#grouping" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "config_id\n", | |
| "1 3\n", | |
| "2 4\n", | |
| "3 5\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# salesdateがグループで最大の行のidを取り出す\n", | |
| "df4_2 = pd.read_csv('select_max_sample-3.csv', sep=',', header=0, index_col=None, parse_dates=['salesdate'])\n", | |
| "df4_2.groupby('config_id').apply(lambda subf: subf['id'][subf['salesdate'].idxmax()])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>id</th>\n", | |
| " <th>no</th>\n", | |
| " <th>config_id</th>\n", | |
| " <th>salesdate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2012-02-10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2012-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2012-06-03</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " id no config_id salesdate\n", | |
| "2 3 2 1 2012-02-10\n", | |
| "3 4 1 2 2012-01-04\n", | |
| "4 5 2 3 2012-06-03" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# idxmaxをapplyから適用できたが、\n", | |
| "# インデックスを取得するだけなら ln [7] のように単にidxmaxを使った方がスッキリしているように思う\n", | |
| "idx = df4_2.groupby('config_id').apply(lambda subf: subf['salesdate'].idxmax())\n", | |
| "df4_2.loc[idx]" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Author
Author
Revision2での変更内容:
助言2についてsalesdateカラムをdatatime型に変換したdf3を使っていたため、
助言2の内容が確認できていなかったことに気づいた。
そこでsalesdateカラムがobject型のdf2を用意し、動作確認を行った。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://qiita.com/ftnext/items/48b7baa49217fdc37ae4#comment-8f7d0214dd2abdc18977
にていただいたコメントの動作確認をしたnotebookです。