Last active
July 10, 2020 16:10
-
-
Save stanlee321/e06296ea48f65e67732ce78f612fc27b to your computer and use it in GitHub Desktop.
PANDAS TRICKS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Split and expand string values into columns | |
| df[‘name’] = df.name.str.split(" ", expand=True) | |
| # GRoup by two columns | |
| gkk = df.groupby(['Team', 'Position']) | |
| # Merge two df with common columns | |
| #df_values = pd.concat([df_com_count,max_work_com ], axis=1) | |
| df_values = pd.merge(df_com_count,max_work_com, on = "Ministerio", how='outer') | |
| df_values.fillna(0, inplace=True) | |
| df_values.rename(columns={ | |
| "TotalWork_COM": "Acciones: Comunicacion", | |
| "TotalWork": "Acciones: Matriz de Acciones" | |
| }, inplace=True) | |
| df_values.info() | |
| # SNS BAR PLOT WITH ROTATIONS | |
| def plot_bar(df_col, title = 'Call-center, top 10 coductas'): | |
| city_count = df_col.value_counts() | |
| city_count = city_count[:10,] | |
| plt.figure(figsize=(10,5)) | |
| plot_bar_count = sns.barplot(city_count.index, city_count.values, alpha=0.8) | |
| plt.setp(plot_bar_count.get_xticklabels(), rotation=90) | |
| plt.title(title) | |
| plt.ylabel('Number of Calls', fontsize=12) | |
| plt.xlabel('Tipo de Conducta', fontsize=12) | |
| plt.show() | |
| def create_timestamp_indexes(self, df, column="", format=""): | |
| """ | |
| Create datestamp index to dataframe | |
| """ | |
| df['time'] = pd.to_datetime(df[column], format=format) | |
| # df = df.drop(columns_to_remove, 1) | |
| df.set_index('time', inplace=True) | |
| return df | |
| # Create excel sheets | |
| # Create a Pandas Excel writer using XlsxWriter as the engine. | |
| writer = pd.ExcelWriter('pandas_multiple.xlsx', engine='xlsxwriter') | |
| # Write each dataframe to a different worksheet. | |
| df1.to_excel(writer, sheet_name='Sheet1') | |
| df2.to_excel(writer, sheet_name='Sheet2') | |
| df3.to_excel(writer, sheet_name='Sheet3') | |
| # Close the Pandas Excel writer and output the Excel file. | |
| writer.save() | |
| # GRoup by and count | |
| max_work = remote_df.groupby("Ministerio")["Tareas"].agg(np.max).sort_values(ascending=True).reset_index(name="TotalWork") | |
| # REGEX FIND BETWEEN: | |
| import re | |
| # SEARCH BETWEEN PIPES | |
| pattern = re.compile(r'(?<=\?id=)[^&comment_id]*') | |
| # Iterar sobre la lista y buscar lo que esta dentro del patron de pipes | ... | | |
| match = re.search(pattern, text ) | |
| if match: | |
| name = match.group(0) | |
| print(name) | |
| ############################# | |
| df_comments_top_N00 = df_comments.assign( | |
| topN00 = df_comments["commenter"].isin(df_comments_topN00_c["commenter"]).astype(int) | |
| ) | |
| ############################### | |
| # Parsear la fecha datetime | |
| from dateutil import parser | |
| df_comments["timestamp"] = df_comments["comment_timestamp"].apply(lambda x: parser.parse(x) ) | |
| print(df_comments.shape) | |
| df_comments.head() | |
| # Barras del numero de post que se tiene de las paginas scrapeadas | |
| df_post_counts = df_posts_raw["real_page_name"].value_counts() | |
| ax = df_post_counts.plot(kind="barh", figsize=(13,8)) | |
| for i, v in enumerate(df_post_counts): | |
| ax.text(v + 1, i + -.15, str(v), color='blue', fontweight='bold') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment