Last active
February 18, 2023 19:43
-
-
Save domantasg/29fddc0052db8247a5db4038b9d577e6 to your computer and use it in GitHub Desktop.
Delete WordPress posts with less than 10 impressions in Google Search Console
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import base64 | |
from tqdm import tqdm | |
import pandas as pd | |
from datetime import datetime, timedelta | |
import searchconsole | |
domain = "" # Must have trailing slash! | |
# WordPress Logins | |
url = domain + "wp-json/wp/v2" | |
user = "" | |
password = "" | |
credentials = user + ':' + password | |
token = base64.b64encode(credentials.encode()) | |
header = {'Authorization': 'Basic ' + token.decode('utf-8')} | |
def page_numbers(): | |
"""Infinite generate of page numbers""" | |
num = 1 | |
while True: | |
yield num | |
num += 1 | |
### Get post URL, ID, Date and add to dataframe | |
df = pd.DataFrame(columns = ['ID', 'URL', "Date"]) | |
try: | |
for page in tqdm(page_numbers()): | |
# Fetch the next [pagesize=10] posts | |
posts_page = requests.get(url + "/posts", params={"page": page, "per_page": 100}).json() | |
# Check for "last page" error code | |
# No error code -> add posts | |
for i in posts_page: | |
link = i["link"] | |
date = i["date"] | |
id = i["id"] | |
new_df = pd.DataFrame(data=[[id, link, date]], columns=['ID', 'URL', 'Date']) | |
df = pd.concat([df, new_df]) | |
if isinstance(posts_page, dict) and posts_page["code"] == "rest_post_invalid_page_number": # Found last page | |
break | |
except Exception: | |
pass | |
## Get clicks and impressions for GSC | |
account = searchconsole.authenticate(client_config='client_secret.json',credentials='credentials.json') | |
webproperty = account[domain] | |
report = webproperty.query.range('today', days=-30).dimension('page').get() | |
df_gsc = pd.DataFrame(columns = ['URL', 'Clicks', "Impressions"]) | |
for i in report.rows: | |
gsc_df = pd.DataFrame(data=[[i.page, i.clicks, i.impressions]], columns=['URL', 'Clicks', "Impressions"]) | |
df_gsc = pd.concat([df_gsc, gsc_df], join="inner") | |
merged = df.set_index('URL').join(df_gsc.set_index('URL')) | |
merged['Impressions'] = merged['Impressions'].fillna(0) | |
# Select only posts published 30 days ago or older | |
now = datetime.today() | |
today = now.strftime('%Y-%m-%dT%H-%m-%S') | |
minus_30_days = now - timedelta(days=30) | |
minus_30_days = minus_30_days.strftime('%Y-%m-%dT%H-%m-%S') | |
minus_180_days = now - timedelta(days=180) | |
minus_180_days = minus_180_days.strftime('%Y-%m-%dT%H-%m-%S') | |
df2 = merged.loc[(merged["Date"].between(minus_180_days, minus_30_days)) & (merged['Impressions'] < 10)] | |
print(df2.to_markdown()) | |
posts_to_delete = len(df2) | |
## Delete posts | |
print(str(posts_to_delete) + " posts will be deleted!") | |
input("Press Enter to continue...") | |
print("okayed") | |
deleted_posts = 0 | |
def delete_post(): | |
for index, row in df2.iterrows(): | |
post_id = str(row["ID"]) | |
response = requests.delete(url + "/posts/" + post_id, headers=header) | |
global deleted_posts | |
if response.status_code == 200: | |
deleted_posts += 1 | |
delete_post() | |
print("Posts deleted: " + str(deleted_posts)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment