Last active
April 3, 2021 18:17
-
-
Save spepechen/ae1a57ee72252a3a0a8d to your computer and use it in GitHub Desktop.
Handy pandas snippets! 🐼means in 0.24.2, others are still in older versions. I'm planning to update all soon. ----- Dec 2019
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### BASIC ######################################################################################################################## | |
# 🐼cleaning str in the header | |
df.columns = [x.lower().strip() for x in df.columns] # lower case, trim leading and trailing spaces | |
df.columns = df.columns.str.replace(' ','') # remove all whitespaces | |
# checking NaN in all df | |
df.isnull().values.any() | |
# get column-slices | |
df.ix[:,:2] | |
df.iloc[:,[0,3]] | |
df.ix[:,'Column_name':] | |
# header as a list | |
list(df.columns.values) | |
df.columns.tolist() | |
# unique values in a column | |
df['col_name'].unique() | |
# series to dataframe | |
df = pd.DataFrame( that_series, columns=['count'] ) | |
# unique value and its counts in column | |
df['col_name'].value_counts() | |
# plot top 10 value counts as a bar chart | |
df['col_name'].value_counts()[:10].plot(kind = 'bar', title="This is a title.") | |
# drop col by col name | |
df = df.drop('column_name', 1) | |
# drop col by index | |
df.drop(df.columns[[0, 1, 3]], axis=1) # Note: zero indexed | |
# drop columns based on a list (drop_col) | |
df.drop([col for col in drop_col if col in df], axis=1, inplace=True) | |
# reorder col | |
new_order = ['column1', 'column4', 'column2', 'column3'] | |
df = df[new_order] | |
# rename col names | |
df.columns = ['Name1', 'Name2', 'Name3'...] | |
df=df.rename(columns = {'two':'new_name'}) | |
# output file without index | |
df.to_csv('example.csv', index=False) | |
# tsv file | |
df.to_csv('example.tsv', sep='\t') | |
# use a list of values to select rows, and put selected rows in a new df | |
ItemsYouWant = ['a', 'b', 'c'] | |
df[df['old_column'].isin([ItemsYouWant])] | |
# dorp NaN based on one col | |
df = df[df['Things'].notnull()] | |
# drop duplicates | |
df = df.drop_duplicates('Things') | |
#split dictionary into columns | |
df = pd.DataFrame({'a':[1,2,3], 'b':[{'c':1}, {'d':3}, {'c':5, 'd':6}]}) | |
df['b'].apply(pd.Series) | |
# create dict with two columns | |
a_dict = dict(zip(df['code'], df['country'])) | |
################################## Time Date ################################################ | |
# 🐼turn str to datetime formate | |
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d') | |
################################## Formatting ################################################ | |
pd.options.display.max_columns = 2000 # remove ellipsis | |
pd.options.display.max_rows = 2000 # remove ellipsis |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment