diogobaltazar · February 2, 2019 23:30
diff --git a/src.py b/src.py
 import numpy as np
 import pandas as pd

 > lbls = ['a', 'b', 'c'] # or np.array
 > data = [123, 423, 456]
 > d = {}
 > for i in np.arange(len(lbls)):
 >     d[lbls[i]] = data[i]
    
 > pd.Series(data = data)
 0    123
 1    423
 2    456
 dtype: int64
 > pd.Series(data = data, index = lbls)
 a    123
 b    423
 c    456
 dtype: int64
  
 > s = pd.Series([sum])  # HOLDING FUNCTION inside dp series!!!! It can hold a set of transformations to exec
 > s[0](np.arange(1, 101))
 5050

 >import numpy as np
 > from numpy.random import randn
 > import pandas as pd
 > import string as st
 > np.random.seed(101) # set the SEED to GEN DATA
 > nRows = 2
 > nCols = 2
 > rows = st.ascii_lowercase[:nRows]
 > cols = st.ascii_uppercase[nRows:nRows + nCols]
 > pd.DataFrame(data = randn(nRows, nCols) \
                  , index = [rows[_] for _ in np.arange(nRows)] \
                  , columns = [cols[_] for _ in np.arange(nRows)])
 > df # Colums and rows are Series!!!

 	C	        D
 a	2.706850	0.628133
 b	0.907969	0.503826

 > df[[cols[_] for _ in np.arange(nRows)]]

 	C	        D
 a	2.706850	0.628133
 b	0.907969	0.503826

 > df['E'] = '' # ADD COLUMN
 > df

 	C	        D	        E
 a	2.706850	0.628133	
 b	0.907969	0.503826	

 > df.drop('a', inplace = True)		# axis 0 by default
 > df.drop('E', axis=1, inplace = True)	# inplace to affect the DS
 > df

 	C		D
 b	0.907969	0.503826

 > df.loc['b']
 C    0.907969
 D    0.503826
 E            
 Name: b, dtype: object

 > df.iloc[1]
 C    0.907969
 D    0.503826
 E            
 Name: b, dtype: object
 		
 > df.loc['a', 'C']
 2.706849839399938

 > df.loc[['a', 'b'], ['C', 'D']]

 	C		D
 a	2.706850	0.628133
 b	0.907969	0.503826

 > df > 0

 	C	D	E
 a	True	True	True
 b	True	True	True

 > f = df > 0		# save function in variable?! not exactly, there are no args
 > df[f] 		# for nRows = nCols = 3

 	D		E		F
 a	2.706850	0.628133	0.907969
 b	0.503826	0.651118	NaN
 c	NaN		0.605965	NaN

 > df['D'] > 0 # a column with a comparison operator returns a column of boolean values
 a     True
 b     True
 c    False
 Name: D, dtype: bool

 > df['D'][df['D'] > 0] # passing a col of bool values, filters only the rows whose index matches the indexes of True bool values
 a    2.706850
 b    0.503826
 Name: D, dtype: float64
 		
 > df[df['D'] > 0]
 	D		E		F
 a	2.706850	0.628133	0.907969
 b	0.503826	0.651118	-0.319318

 > df[(df['D'] > 0) & (df['D'] < 2)] # DIFF logical OR operator!!! (op 'and' expects two boolean, not two Series of boolean)

 	D		E		F
 b	0.503826	0.651118	-0.319318

 > df.reset_index() # resetting the index to int values
 	index	D		E		F
 0	a	2.706850	0.628133	0.907969
 1	b	0.503826	0.651118	-0.319318
 2	c	-0.848077	0.605965	-2.018168

 > df['G'] = 'A B C'.split() # needs to match shape
 > df
 	D		E		F		G
 a	2.706850	0.628133	0.907969	A
 b	0.503826	0.651118	-0.319318	B
 c	-0.848077	0.605965	-2.018168	C

 > df.set_index('G')

 	
 G	D		E		F		
 A	2.706850	0.628133	0.907969
 B	0.503826	0.651118	-0.319318
 C	-0.848077	0.605965	-2.018168

 # **************************************************************************
 # INDEX HIERARCHY
 # **************************************************************************

 > import numpy as np
 > import pandas as pd

 > A = 'G1 G1 G1 G2 G2 G2'.split()
 > B = [1, 2, 3, 1, 2, 3]
 > Z = pd.MultiIndex.from_tuples(list(zip(A, B)))

 > df = pd.DataFrame(np.random.randn(6, 2), Z, ['A', 'B'])
 > df.index.names = ['I1', 'I2']
 > df

 		
 I1	I2	A		B		
 G1	1	2.302987	0.729024
 	2	-0.863091	0.305632
 	3	0.243178	0.864165
 G2	1	-1.560931	-0.251897
 	2	-0.578120	0.236996
 	3	0.200780	0.327845

 > df.loc['G1'].loc[1]['B']
 0.187125

 # CROSS SECTIONS (XS)

 > df.xs('G1') # or df.loc['G1']

 	
 I2	A		B	
 1	1.062710	1.752014
 2	0.695547	0.153661
 3	0.167638	-0.765930

 > df.xs(1, level = 'I2')

 I1	A		B		
 G1	-0.447898	0.266207
 G2	0.846904	-0.298436

 > dict = {'A': [1, 2, np.nan], 'B': [5, np.nan, np.nan], 'C': [1, 2, 3]}
 > df = pd.DataFrame(dict)
 > df.dropna()	# drop all rows with NaN values

 	A	B	C
 0	1.0	5.0	1

 > df.dropna(axis = 1)			# drop all cols with NaN values
 > df.fillna(value = 'NO_DATA')		# replace NaN

 > import numpy as np
 > import pandas as pd
 > dict = {'COMPANY': ['Palantir', 'Airbus', 'Klarna', 'Klarna']\
        , 'PERSON': ['P1', 'P2', 'P3', 'Diogo']\
        , 'IQ': [101, 202, 303, 404]}
 > df = pd.DataFrame(dict)
 > comp = df.groupby('COMPANY')
 > comp.mean() # ignores non-n column
 	
 COMPANY		IQ
 Airbus		202.0
 Klarna		353.5
 Palantir	101.0

 > comp.describe() # data for numeric columns

 IQ
 COMPANY		count	mean	std		min	25%	50%	75%	max						
 Airbus		1.0	202.0	NaN		202.0	202.00	202.0	202.00	202.0
 Klarna		2.0	353.5	71.417785	303.0	328.25	353.5	378.75	404.0
 Palantir	1.0	101.0	NaN		101.0	101.00	101.0	101.00	101.0

 # **************************************************************************
 # CONCATENATION: merging, joining (inner, outer, left, right)
 # **************************************************************************

 > import numpy as np
 > import pandas as pd
 > import string as st
 > upChars = list(st.ascii_uppercase)
 > loChars = list(st.ascii_lowercase)
 > cols = upChars[:4]
 > counter = 3
 > O1 = {l: [str(l) + str(index) for index in range(counter + 1)[-len(cols):]] for _, l in enumerate(cols)}
 counter += 4
 > O2 = {l: [str(l) + str(index) for index in range(counter + 1)[-len(cols):]] for _, l in enumerate(cols)}
 counter += 4
 > O3 = {l: [str(l) + str(index) for index in range(counter + 1)[-len(cols):]] for _, l in enumerate(cols)}

 > getIndexes = lambda X: list(map(lambda X: X[1:], X))

 > df1 = pd.DataFrame(data = O1, index = getIndexes(O1[cols[0]]))
 > df2 = pd.DataFrame(data = O2, index = getIndexes(O2[cols[0]]))
 > df3 = pd.DataFrame(data = O3, index = getIndexes(O3[cols[0]]))
 > df1

 	A	B	C	D
 0	A0	B0	C0	D0
 1	A1	B1	C1	D1
 2	A2	B2	C2	D2
 3	A3	B3	C3	D3

 > pd.concat([df3, df2, df1])

 	A	B	C	D
 0	A0	B0	C0	D0
 1	A1	B1	C1	D1
 2	A2	B2	C2	D2
 3	A3	B3	C3	D3
 4	A4	B4	C4	D4
 5	A5	B5	C5	D5
 6	A6	B6	C6	D6
 7	A7	B7	C7	D7
 8	A8	B8	C8	D8
 9	A9	B9	C9	D9
 10	A10	B10	C10	D10
 11	A11	B11	C11	D11

 > pd.concat([df3, df2, df1], axis = 1) # joining on columns (df1 has no values for indexes 4 - 11)

 	A	B	C	D	A	B	C	D	A	B	C	D
 0	A0	B0	C0	D0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
 1	A1	B1	C1	D1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
 10	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	A10	B10	C10	D10
 11	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	A11	B11	C11	D11
 2	A2	B2	C2	D2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
 3	A3	B3	C3	D3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
 4	NaN	NaN	NaN	NaN	A4	B4	C4	D4	NaN	NaN	NaN	NaN
 5	NaN	NaN	NaN	NaN	A5	B5	C5	D5	NaN	NaN	NaN	NaN
 6	NaN	NaN	NaN	NaN	A6	B6	C6	D6	NaN	NaN	NaN	NaN
 7	NaN	NaN	NaN	NaN	A7	B7	C7	D7	NaN	NaN	NaN	NaN
 8	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	A8	B8	C8	D8
 9	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	A9	B9	C9	D9

 > df1  = pd.DataFrame({'A': [1] \
                    ,'B': [1] \
                    ,'C': [2]})
 > df2  = pd.DataFrame({'D': [1, 0, 9] \
                    ,'E': [1, 0, 0] \
                    ,'C': [2, 3, 1]})
 > df3  = pd.DataFrame({'F': ['a', 'b', 'c'] \
                    ,'E': [1, 0, 1] \
                    ,'C': [2, 3, 1]})

 # Inner merging merges on mutual key values
 > pd.merge(df1, df2, how = 'inner', on = 'C') # defaults to 'inner'

 	C	D	E	F
 0	2	1	1	a
 1	3	0	0	b

 > pd.merge(df1, df2, on = ['D', 'C'])

 	C	D	E	F
 0	2	1	1	a
 1	3	0	0	b

 # Outer merging on mutual keys does not exclude the non-matching rows
 > pd.merge(df2, df3, how = 'outer', on = ['C', 'E'])

 	C	D	E	F
 0	2	1.0	1	a
 1	3	0.0	0	b
 2	1	9.0	0	NaN
 3	1	NaN	1	c

 # Non-matching rows are from either of the DS, named 'left' and 'right' DSs
 > pd.merge(df2, df3, how = 'left', on = ['C', 'E'])

 	C	D	E	F
 0	2	1	1	a
 1	3	0	0	b
 2	1	9	0	NaN

 > pd.merge(df2, df3, how = 'right', on = ['C', 'E'])

 	C	D	E	F
 0	2	1.0	1	a
 1	3	0.0	0	b
 2	1	NaN	1	c

 # Join defaults to a merge taking the **INDEX** values as keys

 # **************************************************************************
 # OPERATIONS
 # **************************************************************************

 > import numpy as np
 > import pandas as pd
 > import string as st
 > df = pd.DataFrame(data = {'A' : [_ + 1 for _ in range(4)] \
                         , 'B' : [444, 555, 666, 444] \
                         , 'C' : ['abc', 'def', 'ghi', 'xyz'] })
 > df.head()

 	A	B	C
 0	1	444	abc
 1	2	555	def
 2	3	666	ghi
 3	4	444	xyz

 > df['B'].unique()
 array([444, 555, 666], dtype=int64)

 > df['B'].nunique()
 3

 > df['B].value_counts()
 444    2
 555    1
 666    1
 Name: B, dtype: int64
     
 > df[(df['B'] %  2 == 0) & (df['A'] > 2)]
     
     	A	B	C
 2	3	666	ghi
     
 > f = lambda x: x**2
 > df['A'].apply(f)

 0     1
 1     4
 2     9
 3    16
 Name: A, dtype: int64
 3	4	444	xyz

 > df.columns
 Index(['A', 'B', 'C'], dtype='object')
     
 > df.index
 RangeIndex(start=0, stop=4, step=1)
     
 > df.sort_values('B')

 	A	B	C
 0	1	444	abc
 3	4	444	xyz
 1	2	555	def
 2	3	666	ghi

 > df.isnull()
     
 	A	B	C
 0	False	False	False
 1	False	False	False
 2	False	False	False
 3	False	False	False

 # PIVOT TABLES
 # hierarchy index on A, B
 > df.pivot_table(values = 'D', index = ['A', 'B'], columns = ['C'])
     
 C	
 A	B	x	y	
 bar	one	4.0	1.0
 two	NaN	5.0
 foo	one	1.0	3.0
 two	2.0	NaN
     
 # **************************************************************************
 # IO
 # **************************************************************************

     
 > import pandas as pd
 > df = pd.read_csv('data/data.csv')
 > df
     
 	A	B	C	D	E
 0	1	2	3	4	5
 1	6	7	8	9	10
     
 >  df.to_csv('data/data_2.csv')
	import numpy as np
	import pandas as pd

	> lbls = ['a', 'b', 'c'] # or np.array
	> data = [123, 423, 456]
	> d = {}
	> for i in np.arange(len(lbls)):
	> d[lbls[i]] = data[i]

	> pd.Series(data = data)
	0 123
	1 423
	2 456
	dtype: int64
	> pd.Series(data = data, index = lbls)
	a 123
	b 423
	c 456
	dtype: int64

	> s = pd.Series([sum]) # HOLDING FUNCTION inside dp series!!!! It can hold a set of transformations to exec
	> s[0](np.arange(1, 101))
	5050

	>import numpy as np
	> from numpy.random import randn
	> import pandas as pd
	> import string as st
	> np.random.seed(101) # set the SEED to GEN DATA
	> nRows = 2
	> nCols = 2
	> rows = st.ascii_lowercase[:nRows]
	> cols = st.ascii_uppercase[nRows:nRows + nCols]
	> pd.DataFrame(data = randn(nRows, nCols) \
	, index = [rows[_] for _ in np.arange(nRows)] \
	, columns = [cols[_] for _ in np.arange(nRows)])
	> df # Colums and rows are Series!!!

	C D
	a 2.706850 0.628133
	b 0.907969 0.503826

	> df[[cols[_] for _ in np.arange(nRows)]]

	C D
	a 2.706850 0.628133
	b 0.907969 0.503826

	> df['E'] = '' # ADD COLUMN
	> df

	C D E
	a 2.706850 0.628133
	b 0.907969 0.503826

	> df.drop('a', inplace = True) # axis 0 by default
	> df.drop('E', axis=1, inplace = True) # inplace to affect the DS
	> df

	C D
	b 0.907969 0.503826

	> df.loc['b']
	C 0.907969
	D 0.503826
	E
	Name: b, dtype: object

	> df.iloc[1]
	C 0.907969
	D 0.503826
	E
	Name: b, dtype: object

	> df.loc['a', 'C']
	2.706849839399938

	> df.loc[['a', 'b'], ['C', 'D']]

	C D
	a 2.706850 0.628133
	b 0.907969 0.503826

	> df > 0

	C D E
	a True True True
	b True True True

	> f = df > 0 # save function in variable?! not exactly, there are no args
	> df[f] # for nRows = nCols = 3

	D E F
	a 2.706850 0.628133 0.907969
	b 0.503826 0.651118 NaN
	c NaN 0.605965 NaN

	> df['D'] > 0 # a column with a comparison operator returns a column of boolean values
	a True
	b True
	c False
	Name: D, dtype: bool

	> df['D'][df['D'] > 0] # passing a col of bool values, filters only the rows whose index matches the indexes of True bool values
	a 2.706850
	b 0.503826
	Name: D, dtype: float64

	> df[df['D'] > 0]
	D E F
	a 2.706850 0.628133 0.907969
	b 0.503826 0.651118 -0.319318

	> df[(df['D'] > 0) & (df['D'] < 2)] # DIFF logical OR operator!!! (op 'and' expects two boolean, not two Series of boolean)

	D E F
	b 0.503826 0.651118 -0.319318

	> df.reset_index() # resetting the index to int values
	index D E F
	0 a 2.706850 0.628133 0.907969
	1 b 0.503826 0.651118 -0.319318
	2 c -0.848077 0.605965 -2.018168

	> df['G'] = 'A B C'.split() # needs to match shape
	> df
	D E F G
	a 2.706850 0.628133 0.907969 A
	b 0.503826 0.651118 -0.319318 B
	c -0.848077 0.605965 -2.018168 C

	> df.set_index('G')


	G D E F
	A 2.706850 0.628133 0.907969
	B 0.503826 0.651118 -0.319318
	C -0.848077 0.605965 -2.018168

	# **************************************************************************
	# INDEX HIERARCHY
	# **************************************************************************

	> import numpy as np
	> import pandas as pd

	> A = 'G1 G1 G1 G2 G2 G2'.split()
	> B = [1, 2, 3, 1, 2, 3]
	> Z = pd.MultiIndex.from_tuples(list(zip(A, B)))

	> df = pd.DataFrame(np.random.randn(6, 2), Z, ['A', 'B'])
	> df.index.names = ['I1', 'I2']
	> df


	I1 I2 A B
	G1 1 2.302987 0.729024
	2 -0.863091 0.305632
	3 0.243178 0.864165
	G2 1 -1.560931 -0.251897
	2 -0.578120 0.236996
	3 0.200780 0.327845

	> df.loc['G1'].loc[1]['B']
	0.187125

	# CROSS SECTIONS (XS)

	> df.xs('G1') # or df.loc['G1']


	I2 A B
	1 1.062710 1.752014
	2 0.695547 0.153661
	3 0.167638 -0.765930

	> df.xs(1, level = 'I2')

	I1 A B
	G1 -0.447898 0.266207
	G2 0.846904 -0.298436

	> dict = {'A': [1, 2, np.nan], 'B': [5, np.nan, np.nan], 'C': [1, 2, 3]}
	> df = pd.DataFrame(dict)
	> df.dropna() # drop all rows with NaN values

	A B C
	0 1.0 5.0 1

	> df.dropna(axis = 1) # drop all cols with NaN values
	> df.fillna(value = 'NO_DATA') # replace NaN

	> import numpy as np
	> import pandas as pd
	> dict = {'COMPANY': ['Palantir', 'Airbus', 'Klarna', 'Klarna']\
	, 'PERSON': ['P1', 'P2', 'P3', 'Diogo']\
	, 'IQ': [101, 202, 303, 404]}
	> df = pd.DataFrame(dict)
	> comp = df.groupby('COMPANY')
	> comp.mean() # ignores non-n column

	COMPANY IQ
	Airbus 202.0
	Klarna 353.5
	Palantir 101.0

	> comp.describe() # data for numeric columns

	IQ
	COMPANY count mean std min 25% 50% 75% max
	Airbus 1.0 202.0 NaN 202.0 202.00 202.0 202.00 202.0
	Klarna 2.0 353.5 71.417785 303.0 328.25 353.5 378.75 404.0
	Palantir 1.0 101.0 NaN 101.0 101.00 101.0 101.00 101.0

	# **************************************************************************
	# CONCATENATION: merging, joining (inner, outer, left, right)
	# **************************************************************************

	> import numpy as np
	> import pandas as pd
	> import string as st
	> upChars = list(st.ascii_uppercase)
	> loChars = list(st.ascii_lowercase)
	> cols = upChars[:4]
	> counter = 3
	> O1 = {l: [str(l) + str(index) for index in range(counter + 1)[-len(cols):]] for _, l in enumerate(cols)}
	counter += 4
	> O2 = {l: [str(l) + str(index) for index in range(counter + 1)[-len(cols):]] for _, l in enumerate(cols)}
	counter += 4
	> O3 = {l: [str(l) + str(index) for index in range(counter + 1)[-len(cols):]] for _, l in enumerate(cols)}

	> getIndexes = lambda X: list(map(lambda X: X[1:], X))

	> df1 = pd.DataFrame(data = O1, index = getIndexes(O1[cols[0]]))
	> df2 = pd.DataFrame(data = O2, index = getIndexes(O2[cols[0]]))
	> df3 = pd.DataFrame(data = O3, index = getIndexes(O3[cols[0]]))
	> df1

	A B C D
	0 A0 B0 C0 D0
	1 A1 B1 C1 D1
	2 A2 B2 C2 D2
	3 A3 B3 C3 D3

	> pd.concat([df3, df2, df1])

	A B C D
	0 A0 B0 C0 D0
	1 A1 B1 C1 D1
	2 A2 B2 C2 D2
	3 A3 B3 C3 D3
	4 A4 B4 C4 D4
	5 A5 B5 C5 D5
	6 A6 B6 C6 D6
	7 A7 B7 C7 D7
	8 A8 B8 C8 D8
	9 A9 B9 C9 D9
	10 A10 B10 C10 D10
	11 A11 B11 C11 D11

	> pd.concat([df3, df2, df1], axis = 1) # joining on columns (df1 has no values for indexes 4 - 11)

	A B C D A B C D A B C D
	0 A0 B0 C0 D0 NaN NaN NaN NaN NaN NaN NaN NaN
	1 A1 B1 C1 D1 NaN NaN NaN NaN NaN NaN NaN NaN
	10 NaN NaN NaN NaN NaN NaN NaN NaN A10 B10 C10 D10
	11 NaN NaN NaN NaN NaN NaN NaN NaN A11 B11 C11 D11
	2 A2 B2 C2 D2 NaN NaN NaN NaN NaN NaN NaN NaN
	3 A3 B3 C3 D3 NaN NaN NaN NaN NaN NaN NaN NaN
	4 NaN NaN NaN NaN A4 B4 C4 D4 NaN NaN NaN NaN
	5 NaN NaN NaN NaN A5 B5 C5 D5 NaN NaN NaN NaN
	6 NaN NaN NaN NaN A6 B6 C6 D6 NaN NaN NaN NaN
	7 NaN NaN NaN NaN A7 B7 C7 D7 NaN NaN NaN NaN
	8 NaN NaN NaN NaN NaN NaN NaN NaN A8 B8 C8 D8
	9 NaN NaN NaN NaN NaN NaN NaN NaN A9 B9 C9 D9

	> df1 = pd.DataFrame({'A': [1] \
	,'B': [1] \
	,'C': [2]})
	> df2 = pd.DataFrame({'D': [1, 0, 9] \
	,'E': [1, 0, 0] \
	,'C': [2, 3, 1]})
	> df3 = pd.DataFrame({'F': ['a', 'b', 'c'] \
	,'E': [1, 0, 1] \
	,'C': [2, 3, 1]})

	# Inner merging merges on mutual key values
	> pd.merge(df1, df2, how = 'inner', on = 'C') # defaults to 'inner'

	C D E F
	0 2 1 1 a
	1 3 0 0 b

	> pd.merge(df1, df2, on = ['D', 'C'])

	C D E F
	0 2 1 1 a
	1 3 0 0 b

	# Outer merging on mutual keys does not exclude the non-matching rows
	> pd.merge(df2, df3, how = 'outer', on = ['C', 'E'])

	C D E F
	0 2 1.0 1 a
	1 3 0.0 0 b
	2 1 9.0 0 NaN
	3 1 NaN 1 c

	# Non-matching rows are from either of the DS, named 'left' and 'right' DSs
	> pd.merge(df2, df3, how = 'left', on = ['C', 'E'])

	C D E F
	0 2 1 1 a
	1 3 0 0 b
	2 1 9 0 NaN

	> pd.merge(df2, df3, how = 'right', on = ['C', 'E'])

	C D E F
	0 2 1.0 1 a
	1 3 0.0 0 b
	2 1 NaN 1 c

	# Join defaults to a merge taking the INDEX values as keys

	# **************************************************************************
	# OPERATIONS
	# **************************************************************************

	> import numpy as np
	> import pandas as pd
	> import string as st
	> df = pd.DataFrame(data = {'A' : [_ + 1 for _ in range(4)] \
	, 'B' : [444, 555, 666, 444] \
	, 'C' : ['abc', 'def', 'ghi', 'xyz'] })
	> df.head()

	A B C
	0 1 444 abc
	1 2 555 def
	2 3 666 ghi
	3 4 444 xyz

	> df['B'].unique()
	array([444, 555, 666], dtype=int64)

	> df['B'].nunique()
	3

	> df['B].value_counts()
	444 2
	555 1
	666 1
	Name: B, dtype: int64

	> df[(df['B'] % 2 == 0) & (df['A'] > 2)]

	A B C
	2 3 666 ghi

	> f = lambda x: x**2
	> df['A'].apply(f)

	0 1
	1 4
	2 9
	3 16
	Name: A, dtype: int64
	3 4 444 xyz

	> df.columns
	Index(['A', 'B', 'C'], dtype='object')

	> df.index
	RangeIndex(start=0, stop=4, step=1)

	> df.sort_values('B')

	A B C
	0 1 444 abc
	3 4 444 xyz
	1 2 555 def
	2 3 666 ghi

	> df.isnull()

	A B C
	0 False False False
	1 False False False
	2 False False False
	3 False False False

	# PIVOT TABLES
	# hierarchy index on A, B
	> df.pivot_table(values = 'D', index = ['A', 'B'], columns = ['C'])

	C
	A B x y
	bar one 4.0 1.0
	two NaN 5.0
	foo one 1.0 3.0
	two 2.0 NaN

	# **************************************************************************
	# IO
	# **************************************************************************


	> import pandas as pd
	> df = pd.read_csv('data/data.csv')
	> df

	A B C D E
	0 1 2 3 4 5
	1 6 7 8 9 10

	> df.to_csv('data/data_2.csv')