Skip to content

Instantly share code, notes, and snippets.

@stanlee321
Last active July 10, 2020 16:10
Show Gist options
  • Save stanlee321/e06296ea48f65e67732ce78f612fc27b to your computer and use it in GitHub Desktop.
Save stanlee321/e06296ea48f65e67732ce78f612fc27b to your computer and use it in GitHub Desktop.
PANDAS TRICKS
# Split and expand string values into columns
df[‘name’] = df.name.str.split(" ", expand=True)
# GRoup by two columns
gkk = df.groupby(['Team', 'Position'])
# Merge two df with common columns
#df_values = pd.concat([df_com_count,max_work_com ], axis=1)
df_values = pd.merge(df_com_count,max_work_com, on = "Ministerio", how='outer')
df_values.fillna(0, inplace=True)
df_values.rename(columns={
"TotalWork_COM": "Acciones: Comunicacion",
"TotalWork": "Acciones: Matriz de Acciones"
}, inplace=True)
df_values.info()
# SNS BAR PLOT WITH ROTATIONS
def plot_bar(df_col, title = 'Call-center, top 10 coductas'):
city_count = df_col.value_counts()
city_count = city_count[:10,]
plt.figure(figsize=(10,5))
plot_bar_count = sns.barplot(city_count.index, city_count.values, alpha=0.8)
plt.setp(plot_bar_count.get_xticklabels(), rotation=90)
plt.title(title)
plt.ylabel('Number of Calls', fontsize=12)
plt.xlabel('Tipo de Conducta', fontsize=12)
plt.show()
def create_timestamp_indexes(self, df, column="", format=""):
"""
Create datestamp index to dataframe
"""
df['time'] = pd.to_datetime(df[column], format=format)
# df = df.drop(columns_to_remove, 1)
df.set_index('time', inplace=True)
return df
# Create excel sheets
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_multiple.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
df3.to_excel(writer, sheet_name='Sheet3')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
# GRoup by and count
max_work = remote_df.groupby("Ministerio")["Tareas"].agg(np.max).sort_values(ascending=True).reset_index(name="TotalWork")
# REGEX FIND BETWEEN:
import re
# SEARCH BETWEEN PIPES
pattern = re.compile(r'(?<=\?id=)[^&comment_id]*')
# Iterar sobre la lista y buscar lo que esta dentro del patron de pipes | ... |
match = re.search(pattern, text )
if match:
name = match.group(0)
print(name)
#############################
df_comments_top_N00 = df_comments.assign(
topN00 = df_comments["commenter"].isin(df_comments_topN00_c["commenter"]).astype(int)
)
###############################
# Parsear la fecha datetime
from dateutil import parser
df_comments["timestamp"] = df_comments["comment_timestamp"].apply(lambda x: parser.parse(x) )
print(df_comments.shape)
df_comments.head()
# Barras del numero de post que se tiene de las paginas scrapeadas
df_post_counts = df_posts_raw["real_page_name"].value_counts()
ax = df_post_counts.plot(kind="barh", figsize=(13,8))
for i, v in enumerate(df_post_counts):
ax.text(v + 1, i + -.15, str(v), color='blue', fontweight='bold')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment