Last active
February 2, 2024 22:58
-
-
Save eliasdabbas/9505564080b69fec7198d220c2cacab5 to your computer and use it in GitHub Desktop.
Create a heatmap of SERPs, using a table with columns: "keyword", "rank", and "domain"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.graph_objects as go | |
import pandas as pd | |
def serp_heatmap(df, num_domains=10, select_domain=None): | |
df = df.rename(columns={'domain': 'displayLink', | |
'searchTerms': 'keyword'}) | |
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist() | |
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist() | |
top_df = df[df['displayLink'].isin(top_domains) & df['displayLink'].ne('')] | |
top_df_counts_means = (top_df | |
.groupby('displayLink', as_index=False) | |
.agg({'rank': ['count', 'mean']})) | |
top_df_counts_means.columns = ['displayLink', 'rank_count', 'rank_mean'] | |
top_df = (pd.merge(top_df, top_df_counts_means) | |
.sort_values(['rank_count', 'rank_mean'], | |
ascending=[False, True])) | |
rank_counts = (top_df | |
.groupby(['displayLink', 'rank']) | |
.agg({'rank': ['count']}) | |
.reset_index()) | |
rank_counts.columns = ['displayLink', 'rank', 'count'] | |
summary = (df | |
.groupby(['displayLink'], as_index=False) | |
.agg({'rank': ['count', 'mean']}) | |
.sort_values(('rank', 'count'), ascending=False) | |
.assign(coverage=lambda df: (df[('rank', 'count')] | |
.div(df[('rank', 'count')] | |
.sum())))) | |
summary.columns = ['displayLink', 'count', 'avg_rank', 'coverage'] | |
summary['displayLink'] = summary['displayLink'].str.replace('www.', '', regex=True) | |
summary['avg_rank'] = summary['avg_rank'].round(1) | |
summary['coverage'] = (summary['coverage'].mul(100) | |
.round(1).astype(str).add('%')) | |
# num_queries might need to be set manually if you have the same query | |
# repeated across countries, languages, etc | |
num_queries = df['keyword'].nunique() | |
fig = go.Figure() | |
fig.add_scatter(x=top_df['displayLink'].str.replace('www\.', '', regex=True), | |
y=top_df['rank'], mode='markers', | |
marker={'size': 30, 'opacity': 1/rank_counts['count'].max()}) | |
fig.add_scatter(x=rank_counts['displayLink'].str.replace('www\.', '', regex=True), | |
y=rank_counts['rank'], mode='text', | |
text=rank_counts['count']) | |
for domain in rank_counts['displayLink'].unique(): | |
rank_counts_subset = rank_counts[rank_counts['displayLink'] == domain] | |
fig.add_scatter(x=[domain.replace('www.', '')], | |
y=[0], mode='text', | |
marker={'size': 50}, | |
text=str(rank_counts_subset['count'].sum())) | |
fig.add_scatter(x=[domain.replace('www.', '')], | |
y=[-1], mode='text', | |
text=format(rank_counts_subset['count'].sum() / num_queries, '.1%')) | |
fig.add_scatter(x=[domain.replace('www.', '')], | |
y=[-2], mode='text', | |
marker={'size': 50}, | |
text=str(round(rank_counts_subset['rank'] | |
.mul(rank_counts_subset['count']) | |
.sum() / rank_counts_subset['count'] | |
.sum(), 2))) | |
minrank, maxrank = int(min(top_df['rank'].unique())), int(max(top_df['rank'].unique())) | |
fig.layout.yaxis.tickvals = [-2, -1, 0] + list(range(minrank, maxrank+1)) | |
fig.layout.yaxis.ticktext = ['Avg. Pos.', 'Coverage', 'Total<br>appearances'] + list(range(minrank, maxrank+1)) | |
fig.layout.height = max([600, 100 + ((maxrank - minrank) * 50)]) | |
fig.layout.yaxis.title = 'SERP Rank (number of appearances)' | |
fig.layout.showlegend = False | |
fig.layout.paper_bgcolor = '#eeeeee' | |
fig.layout.plot_bgcolor = '#eeeeee' | |
fig.layout.autosize = False | |
fig.layout.margin.r = 2 | |
fig.layout.margin.l = 120 | |
fig.layout.margin.pad = 0 | |
fig.layout.hovermode = False | |
fig.layout.yaxis.autorange = 'reversed' | |
fig.layout.yaxis.zeroline = False | |
fig.layout.width = 1100 | |
return fig |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Raw data: https://bit.ly/3K9NS9B
Input table (sample rows):
Output chart: