Created
May 14, 2018 03:22
-
-
Save Adhira-Deogade/b95de8c75b9ab3463be3f542291e57ff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Segment 1 - Filtering and selecting data | |
import numpy as np | |
import pandas as pd | |
| |
from pandas import Series, DataFrame | |
Selecting and retrieving data | |
series_obj = Series(np.arange(8), index=['row 1', 'row 2','row 3','row 4','row 5', 'row 6', 'row 7', 'row 8']) | |
series_obj | |
row 1 0 | |
row 2 1 | |
row 3 2 | |
row 4 3 | |
row 5 4 | |
row 6 5 | |
row 7 6 | |
row 8 7 | |
dtype: int32 | |
series_obj['row 7'] | |
# ['label-index'] | |
# ♔┈♔┈♔┈( WHAT THIS DOES ) ┈♔┈♔┈♔ | |
# When you write square brackets with a label-index inside them, this tells Python to select and | |
# retrieve all records with that label-index. | |
series_obj['row 7'] | |
6 | |
series_obj[[0,7]] | |
# [integer index] | |
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔ | |
# When you write square brackets with an integer index inside them, this tells Python to select and | |
# retrieve all records with the specified integer index. | |
series_obj[[0,7]] | |
row 1 0 | |
row 8 7 | |
dtype: int32 | |
np.random.seed(25) | |
DF_obj = DataFrame(np.random.rand(36).reshape((6,6)), | |
index=['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6'], | |
columns=['column 1', 'column 2', 'column 3', 'column 4', 'column 5', 'column 6']) | |
DF_obj | |
column 1 column 2 column 3 column 4 column 5 column 6 | |
row 1 0.870124 0.582277 0.278839 0.185911 0.411100 0.117376 | |
row 2 0.684969 0.437611 0.556229 0.367080 0.402366 0.113041 | |
row 3 0.447031 0.585445 0.161985 0.520719 0.326051 0.699186 | |
row 4 0.366395 0.836375 0.481343 0.516502 0.383048 0.997541 | |
row 5 0.514244 0.559053 0.034450 0.719930 0.421004 0.436935 | |
row 6 0.281701 0.900274 0.669612 0.456069 0.289804 0.525819 | |
# object_name.ix[[row indexes], [column indexes]] | |
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔ | |
# When you call the .ix[] special indexer, and pass in a set of row and colum indexes, this tells | |
# Python to select and retrieve only those specific rows and columns. | |
DF_obj.ix[['row 2', 'row 5'], ['column 5', 'column 2']] | |
column 5 column 2 | |
row 2 0.402366 0.437611 | |
row 5 0.421004 0.559053 | |
Data slicing | |
# ['starting label-index':'ending label-index'] | |
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔ | |
# Data slicing allows you to select and retrieve all records from the starting label-index, to the | |
# ending label-index, and every record in between. | |
series_obj['row 3':'row 7'] | |
row 3 2 | |
row 4 3 | |
row 5 4 | |
row 6 5 | |
row 7 6 | |
dtype: int32 | |
Comparing with scalars | |
| |
# object_name < scalar value | |
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔ | |
# You can use comparison operators (like greater than or less than) to return True / False values for | |
# all records, to indicate how each element compares to a scalar value. | |
DF_obj < .2 | |
column 1 column 2 column 3 column 4 column 5 column 6 | |
row 1 False False False True False True | |
row 2 False False False False False True | |
row 3 False False True False False False | |
row 4 False False False False False False | |
row 5 False False True False False False | |
row 6 False False False False False False | |
Filtering with scalars | |
# object_name[object_name > scalar value] | |
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔ | |
# You can also use comparison operators and scalar values for indexing, to return only the records | |
# that satisfy the comparison expression you write. | |
series_obj[series_obj > 6] | |
row 8 7 | |
dtype: int32 | |
Setting values with scalars | |
# ['label-index', 'label-index', 'label-index'] = scalar value | |
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔ | |
# Setting is where you select all records associated with the specified label-indexes and set those | |
# values equal to a scalar. | |
series_obj['row 1', 'row 5', 'row 8'] = 8 | |
series_obj | |
row 1 8 | |
row 2 1 | |
row 3 2 | |
row 4 3 | |
row 5 8 | |
row 6 5 | |
row 7 6 | |
row 8 8 | |
dtype: int32 | |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment