If a WordPress site doesn't have XML sitemaps, or you can't find them, there are a number of ways to figure out how many tags and posts that site has. The easiest way is by using the REST API to get the number. Note both scripts below are fairly similar and could probably be simplified. Also: please don't run this on sites too aggressively. Not all sites handle getting 100 posts from their REST API endpoints very well.
Last active
January 30, 2024 12:00
-
-
Save jdevalk/e24cac18f301496e27de437c48732c53 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if URL parameter is provided | |
if [ -z "$1" ]; then | |
echo "Usage: $0 <wordpress_site_url>" | |
exit 1 | |
fi | |
# Remove trailing slash from URL if present | |
SITE_URL="${1%/}" | |
PER_PAGE=100 | |
PAGE=1 | |
TOTAL_POSTS=0 | |
# Function to get post count from a single page | |
get_posts_count() { | |
URL="${SITE_URL}/wp-json/wp/v2/posts?per_page=${PER_PAGE}&page=${PAGE}" | |
RESPONSE=$(curl -s -w "%{http_code}" -o temp.json "$URL") | |
HTTP_CODE=$(tail -n1 <<< "$RESPONSE") | |
# Check HTTP status code | |
if [ "$HTTP_CODE" != "200" ]; then | |
echo "Error: HTTP status code $HTTP_CODE on page $PAGE" | |
exit 1 | |
fi | |
# Check for errors in response body | |
if grep -q '"status":"error"' temp.json; then | |
ERROR_MESSAGE=$(jq -r '.error_description' temp.json) | |
echo "Error: $ERROR_MESSAGE" | |
exit 1 | |
fi | |
POSTS_COUNT=$(jq length temp.json) | |
rm temp.json | |
echo $POSTS_COUNT | |
} | |
# Main loop to paginate through all posts | |
while : ; do | |
POSTS_COUNT=$(get_posts_count) | |
# Update total post count | |
TOTAL_POSTS=$((TOTAL_POSTS + POSTS_COUNT)) | |
# Output for each request | |
echo "Page: $PAGE, Posts on this page: $POSTS_COUNT - Current total: $TOTAL_POSTS" | |
# Stop if the number of posts on this page is less than the max per page | |
if [ "$POSTS_COUNT" -lt "$PER_PAGE" ]; then | |
break | |
fi | |
PAGE=$((PAGE + 1)) | |
done | |
echo "Total number of posts: $TOTAL_POSTS" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment