Created
September 23, 2021 19:08
-
-
Save Marceloromeugoncalves/a02952e8eeb483d0131266c1c1793e31 to your computer and use it in GitHub Desktop.
Exemplos Pandas DataFrame.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Useful Pandas Snippets. | |
# Data Types and Conversion. | |
# Convert Series datatype to numeric (will error if column has non-numeric values) | |
pd.to_numeric(df['Column Name']) | |
# Convert Series datatype to numeric, changing non-numeric values to NaN. | |
pd.to_numeric(df['Column Name'], errors='coerce) | |
# Change data type of DataFrame column. | |
df.column_name = df.column_name.astype(np.int64) | |
# ------------------------------------------------------ | |
# Exploring and Filtering Data. | |
# Get a report of all duplicate records in DataFrame, based on specifc columns. | |
dupes = df[df.duplicated(['col1', 'col2', 'col3'], keep=False)] | |
# List unique values in DataFrame column. | |
df['Column Name'].unique() | |
# For each unique values in a DataFrame column, get a frequency count. | |
df['Column Name'].value_counts() | |
# Grab DataFrame rows when column = a specific value. | |
df = df.loc[df.column == 'somevalue'] | |
# Grab DataFrame rows when column values is present in a list. | |
test_data = {'hi': 'yo', 'bye': 'later'} | |
df = pd.DataFrame(list(d.items()), column=['col1', 'col2']) | |
valuelist = ['yo', 'heya'] | |
df[df.col2.isin(valuelist)] | |
# Grav DataFrame rows where column values is not present in a list. | |
test_data = {'h1': 'yo', 'bye': 'later'} | |
df = pd.DataFrame.(list(d.items()), columns=['col1', 'col2']) | |
valuelist = ['yo', 'later'] | |
df[~df.col2.isin(valuelist)] | |
# Select from DataFrame using criteria from multiple columns (use | instead of & to do an OR) | |
newdf = df[(df['column_one'] > 2004]) & (df['column_two'] == 9) | |
# Loop through rows in a DataFrame. | |
for index, row in df.iterrows(): | |
print(index, row['some column'] | |
# Much faster way to loop thhrough DataFrame rows if you can work with tuples. | |
for row in df.itertuples(): | |
print(row) | |
# Get top n for each group of columns in a sorted DataFrame. | |
top5 = df.groupby(['groupingcol1', 'goupingcol2']).head(5) | |
# Grab DataFrame rows where specific column is null/notnull. | |
newdf = df[df['column'].isnull()] | |
# Get quick count of rows in a DataFrame. | |
len(df.index) | |
# Get length of data in DataFrame column. | |
df.column_name.str.len() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment