jmquintana79 · December 13, 2020 20:41
diff --git a/create_pandasdf.py b/create_pandasdf.py
 import numpy as np
 import pandas as pd
 # alternative faster: Pandas on Ray
 import ray.dataframe as pd

 ## ONE NUMPY ARRAY TO PANDAS DATAFRAME

 # data
 ldata = [[1,2],[7,3]]
 # list of list to numpy array
 npdata = np.array(ldata)
 # np array to df pandas
 DF = pd.DataFrame(npdata, index=["row1","row2"], columns=["col1","col2"])

 # get dtypes
 DF.dtypes


 ## TWO NUMPY ARRYES TO PANDAS DATAFRAME

 # data
 x = np.array([1,2,3])
 y = np.array([4,5,6])
 # np arrays to df pandas
 DF = pd.DataFrame({'x':x, 'y':y})



 # SET INDEX
 indexed_df = df.set_index(['A', 'B'])

 # SET FORMAT
 df.index = df.index.astype(str)     # str or other format
 df.index = pd.to_datetime(df.index) # datetime format

 # RENAME INDEX
 DF.index.rename(name, inplace=True)

 # GET LIST OF INDEX OF DATAFRAME
 lindex = DF.index.tolist()

 # RESET INDEX　(and drop if it is required)
 DF = DF.reset_index(drop=False,inplace=False)

 # display head (first lines) of DATAFRAME
 DF.head(n=5)
 # Getting last rows
 DF.tail(n=5)

 # DROP COLUMN OF DATAFRAME PANDAS
 df.drop('column_name', axis=1, inplace=True)
 df.drop(df.columns[[0, 1, 3]], axis=1) # by index


 # RENAME COLUMNS OF DATAFRAME PANDAS
 df.rename(columns={'old1': 'new1', 'old2': 'new1'}, inplace=True)

 # Changing column labels.
 df.columns = ['water_year','rain_octsep', 'outflow_octsep',
              'rain_decfeb', 'outflow_decfeb', 'rain_junaug', 'outflow_junaug']

 # Create CATEGORICAL object
 pd.Categorical(array)


 # Create CONTINGENCE TABLE from 2 arrays
 CONTINGENCE_TABLE = pd.crosstab(array1,array2, margins = True)

 # Create array of consecutive of dates
 pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D')                 # format: Pandas Timestamp
 pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D').to_pydatetime() # format: Python Datetime

 """
 Possible frequencies:
 Y: yearly
 M: monthly
 W: weekly
 D: daily
 H: hourly
 '30min': each 30 minutes (and so on)
 """
	import numpy as np
	import pandas as pd
	# alternative faster: Pandas on Ray
	import ray.dataframe as pd

	## ONE NUMPY ARRAY TO PANDAS DATAFRAME

	# data
	ldata = [[1,2],[7,3]]
	# list of list to numpy array
	npdata = np.array(ldata)
	# np array to df pandas
	DF = pd.DataFrame(npdata, index=["row1","row2"], columns=["col1","col2"])

	# get dtypes
	DF.dtypes


	## TWO NUMPY ARRYES TO PANDAS DATAFRAME

	# data
	x = np.array([1,2,3])
	y = np.array([4,5,6])
	# np arrays to df pandas
	DF = pd.DataFrame({'x':x, 'y':y})



	# SET INDEX
	indexed_df = df.set_index(['A', 'B'])

	# SET FORMAT
	df.index = df.index.astype(str) # str or other format
	df.index = pd.to_datetime(df.index) # datetime format

	# RENAME INDEX
	DF.index.rename(name, inplace=True)

	# GET LIST OF INDEX OF DATAFRAME
	lindex = DF.index.tolist()

	# RESET INDEX　(and drop if it is required)
	DF = DF.reset_index(drop=False,inplace=False)

	# display head (first lines) of DATAFRAME
	DF.head(n=5)
	# Getting last rows
	DF.tail(n=5)

	# DROP COLUMN OF DATAFRAME PANDAS
	df.drop('column_name', axis=1, inplace=True)
	df.drop(df.columns[[0, 1, 3]], axis=1) # by index


	# RENAME COLUMNS OF DATAFRAME PANDAS
	df.rename(columns={'old1': 'new1', 'old2': 'new1'}, inplace=True)

	# Changing column labels.
	df.columns = ['water_year','rain_octsep', 'outflow_octsep',
	'rain_decfeb', 'outflow_decfeb', 'rain_junaug', 'outflow_junaug']

	# Create CATEGORICAL object
	pd.Categorical(array)


	# Create CONTINGENCE TABLE from 2 arrays
	CONTINGENCE_TABLE = pd.crosstab(array1,array2, margins = True)

	# Create array of consecutive of dates
	pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D') # format: Pandas Timestamp
	pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D').to_pydatetime() # format: Python Datetime

	"""
	Possible frequencies:
	Y: yearly
	M: monthly
	W: weekly
	D: daily
	H: hourly
	'30min': each 30 minutes (and so on)
	"""