Created
March 6, 2020 07:58
-
-
Save PandaWhoCodes/58a5f10dcd923492e609338408875986 to your computer and use it in GitHub Desktop.
convert ubersuggest keywords into a gap analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import pandas as pd | |
import os | |
def get_file_names(path=os.getcwd()): | |
return glob.glob(path + "/ubersuggest_*.csv") | |
def get_file(filename): | |
return pd.read_csv(filename) | |
def strip_to_domain(filename): | |
filename = filename.split("\\")[-1] | |
filename = filename[12:] | |
filename = filename[:-4] | |
return filename | |
def get_all_keywords(filenames): | |
keywords = [] | |
for filename in filenames: | |
data = get_file(filename) | |
keywords.extend(data["Keyword"]) | |
return list(set(keywords)) | |
def get_all_data(filenames): | |
domains = [] | |
data = {} | |
for name in filenames: | |
domain = strip_to_domain(name) | |
file = get_file(name) | |
data[domain] = file | |
domains.append(domain) | |
return domains, data | |
def combine_keywords(keywords, domains, data): | |
rows = [] | |
for keyword in keywords: | |
keyword_row = [keyword] | |
extra = [] | |
for domain in domains: | |
df = data[domain] | |
row = df.loc[df["Keyword"] == keyword] | |
if len(row): | |
keyword_row.append(row.iloc[0]["Position"]) | |
extra = [row.iloc[0]["Volume"], row.iloc[0]["EST.Visits"], row.iloc[0]["SEO Difficulty"]] | |
else: | |
keyword_row.append("-") | |
keyword_row.extend(extra) | |
rows.append(keyword_row) | |
return rows | |
def filter_rows(rows): | |
final_rows = [] | |
row_nums = [] | |
for index, row in enumerate(rows): | |
if row.count("-") <= 2: | |
row_nums.append(index) | |
for row_num in row_nums: | |
final_rows.append(rows[row_num]) | |
for index, row in enumerate(rows): | |
if index in row_nums: | |
continue | |
else: | |
final_rows.append(rows[index]) | |
return final_rows | |
additional_columns = ["Volume", "EST.Visits", "SEO Difficulty"] | |
if __name__ == '__main__': | |
names = get_file_names() | |
keywords = get_all_keywords(names) | |
domains, data = get_all_data(names) | |
top_columns = ["Keyword"] + domains + additional_columns | |
rows = combine_keywords(keywords, domains, data) | |
rows = filter_rows(rows) | |
final_df = pd.DataFrame(rows, columns=top_columns) | |
final_df.to_csv("Keyword_Gap_analysis.csv", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment