Created
May 21, 2019 23:21
-
-
Save Kautenja/984190559f638ef1abbc650f38e5779f to your computer and use it in GitHub Desktop.
A method to infer the datatype of a pandas Series.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A method for inferring the data type of Series/ndarrays.""" | |
import pandas as pd | |
# the type cast methods to attempt (in order) | |
_TYPE_CASTS = [pd.to_datetime, pd.to_numeric, pd.to_timedelta] | |
def infer_column_dtype(column): | |
""" | |
Infer the datatype of a pandas column, process only if the column dtype is object. | |
Args: | |
column: a column as a pandas Series or NumPy ndarray | |
Returns: | |
the inferred data type of the column | |
""" | |
# the column must be an object (i.e., string) to cast | |
if column.dtype == "object": | |
# iterate over the type cast methods (in order) | |
for type_cast in _TYPE_CASTS: | |
# try to cast the column using the given method | |
try: | |
inferred = type_cast(column.dropna().unique()) | |
return inferred.dtype | |
except: | |
pass | |
# return the default column dtype | |
return column.dtype | |
# explicitly define the outward facing API of this module | |
__all__ = [infer_column_dtype.__name__] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Inspired by the original solution here