Created
June 24, 2021 22:13
-
-
Save tcvieira/681c036ff4f169473a9651b6d3cbbd58 to your computer and use it in GitHub Desktop.
add_datepart.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "add_datepart.ipynb", | |
"provenance": [], | |
"authorship_tag": "ABX9TyOxr6Ca+pNh38aSyMs94dr7", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/tcvieira/681c036ff4f169473a9651b6d3cbbd58/add_datepart.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "OSCTNCXyFLLY" | |
}, | |
"source": [ | |
"# add_datepart\n", | |
"\n", | |
"Função extraída da lib fastai" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PK2IxkmQFEdY" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import re\n", | |
"\n", | |
"def ifnone(a,b):\n", | |
" \"`a` if `a` is not None, otherwise `b`.\"\n", | |
" return b if a is None else a\n", | |
"\n", | |
"def make_date(df, date_field):\n", | |
" \"Make sure `df[date_field]` is of the right date type.\"\n", | |
" field_dtype = df[date_field].dtype\n", | |
" if isinstance(field_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype):\n", | |
" field_dtype = np.datetime64\n", | |
" if not np.issubdtype(field_dtype, np.datetime64):\n", | |
" df[date_field] = pd.to_datetime(df[date_field], infer_datetime_format=True)\n", | |
"\n", | |
"def add_datepart(df, field_name, prefix=None, drop=True, time=False):\n", | |
" \"Helper function that adds columns relevant to a date in the column `field_name` of `df`.\"\n", | |
" make_date(df, field_name)\n", | |
" field = df[field_name]\n", | |
" prefix = ifnone(prefix, re.sub('[Dd]ate$', '', field_name))\n", | |
" attr = ['Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear', 'Is_month_end', 'Is_month_start',\n", | |
" 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start']\n", | |
" if time: attr = attr + ['Hour', 'Minute', 'Second']\n", | |
" # Pandas removed `dt.week` in v1.1.10\n", | |
" week = field.dt.isocalendar().week.astype(field.dt.day.dtype) if hasattr(field.dt, 'isocalendar') else field.dt.week\n", | |
" for n in attr: df[prefix + n] = getattr(field.dt, n.lower()) if n != 'Week' else week\n", | |
" mask = ~field.isna()\n", | |
" df[prefix + 'Elapsed'] = np.where(mask,field.values.astype(np.int64) // 10 ** 9,np.nan)\n", | |
" if drop: df.drop(field_name, axis=1, inplace=True)\n", | |
" return df" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UtI_fNkqFHkp" | |
}, | |
"source": [ | |
"df = pd.DataFrame({'date': ['2019-12-04', None, '2019-11-15', '2019-10-24']})\n", | |
"df = add_datepart(df, 'date')\n", | |
"df.head()" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment