Last active
December 13, 2020 20:41
-
-
Save jmquintana79/7f6f5aa391157c58ecb8 to your computer and use it in GitHub Desktop.
create pandas dataframe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
# alternative faster: Pandas on Ray | |
import ray.dataframe as pd | |
## ONE NUMPY ARRAY TO PANDAS DATAFRAME | |
# data | |
ldata = [[1,2],[7,3]] | |
# list of list to numpy array | |
npdata = np.array(ldata) | |
# np array to df pandas | |
DF = pd.DataFrame(npdata, index=["row1","row2"], columns=["col1","col2"]) | |
# get dtypes | |
DF.dtypes | |
## TWO NUMPY ARRYES TO PANDAS DATAFRAME | |
# data | |
x = np.array([1,2,3]) | |
y = np.array([4,5,6]) | |
# np arrays to df pandas | |
DF = pd.DataFrame({'x':x, 'y':y}) | |
# SET INDEX | |
indexed_df = df.set_index(['A', 'B']) | |
# SET FORMAT | |
df.index = df.index.astype(str) # str or other format | |
df.index = pd.to_datetime(df.index) # datetime format | |
# RENAME INDEX | |
DF.index.rename(name, inplace=True) | |
# GET LIST OF INDEX OF DATAFRAME | |
lindex = DF.index.tolist() | |
# RESET INDEX (and drop if it is required) | |
DF = DF.reset_index(drop=False,inplace=False) | |
# display head (first lines) of DATAFRAME | |
DF.head(n=5) | |
# Getting last rows | |
DF.tail(n=5) | |
# DROP COLUMN OF DATAFRAME PANDAS | |
df.drop('column_name', axis=1, inplace=True) | |
df.drop(df.columns[[0, 1, 3]], axis=1) # by index | |
# RENAME COLUMNS OF DATAFRAME PANDAS | |
df.rename(columns={'old1': 'new1', 'old2': 'new1'}, inplace=True) | |
# Changing column labels. | |
df.columns = ['water_year','rain_octsep', 'outflow_octsep', | |
'rain_decfeb', 'outflow_decfeb', 'rain_junaug', 'outflow_junaug'] | |
# Create CATEGORICAL object | |
pd.Categorical(array) | |
# Create CONTINGENCE TABLE from 2 arrays | |
CONTINGENCE_TABLE = pd.crosstab(array1,array2, margins = True) | |
# Create array of consecutive of dates | |
pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D') # format: Pandas Timestamp | |
pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D').to_pydatetime() # format: Python Datetime | |
""" | |
Possible frequencies: | |
Y: yearly | |
M: monthly | |
W: weekly | |
D: daily | |
H: hourly | |
'30min': each 30 minutes (and so on) | |
""" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment