Skip to content

Instantly share code, notes, and snippets.

View dipanjanS's full-sized avatar
:octocat:

Dipanjan (DJ) Sarkar dipanjanS

:octocat:
View GitHub Profile
c = sns.catplot(x='day', y='count',
data=errors_by_date_sorted_pd_df,
kind='point', height=5, aspect=1.5)
errors_by_date_sorted_df = (not_found_df
.groupBy(F.dayofmonth('time').alias('day'))
.count()
.sort("day"))
errors_by_date_sorted_pd_df = errors_by_date_sorted_df.toPandas()
errors_by_date_sorted_pd_df
hosts_404_count_df = (not_found_df
.groupBy("host")
.count()
.sort("count", ascending=False)
.limit(20))
hosts_404_count_df.show(truncate=False)
endpoints_404_count_df = (not_found_df
.groupBy("endpoint")
.count()
.sort("count", ascending=False)
.limit(20))
endpoints_404_count_df.show(truncate=False)
not_found_df = logs_df.filter(logs_df["status"] == 404).cache()
print(('Total 404 responses: {}').format(not_found_df.count()))
c = sns.catplot(x='day', y='avg_reqs',
data=avg_daily_reqests_per_host_df,
kind='point', height=5, aspect=1.5)
daily_hosts_df = (host_day_distinct_df
.groupBy('day')
.count()
.select(col("day"),
col("count").alias("total_hosts")))
total_daily_reqests_df = (logs_df
.select(F.dayofmonth("time")
.alias("day"))
.groupBy("day")
c = sns.catplot(x='day', y='count',
data=daily_hosts_df,
kind='point', height=5,
aspect=1.5)
def_mr = pd.get_option('max_rows')
pd.set_option('max_rows', 10)
daily_hosts_df = (host_day_distinct_df
.groupBy('day')
.count()
.sort("day"))
daily_hosts_df = daily_hosts_df.toPandas()
daily_hosts_df
host_day_distinct_df = (host_day_df
.dropDuplicates())
host_day_distinct_df.show(5, truncate=False)