Skip to content

Instantly share code, notes, and snippets.

@robertdevore
Last active June 19, 2025 15:40
Show Gist options
  • Save robertdevore/65485581187976af112f781fbfa6a3ac to your computer and use it in GitHub Desktop.
Save robertdevore/65485581187976af112f781fbfa6a3ac to your computer and use it in GitHub Desktop.
Bulk URL Status Checker for Google Sheets

Bulk URL Status Checker for Google Sheets (Python Version)

Efficiently checks the HTTP status of URLs listed in a Google Sheet and writes the results back in batches. Designed to handle large datasets without timeouts, using Python, gspread, and requests.

Install dependencies

pip install gspread google-auth requests

Set up your Google Sheets API credentials (OAuth2 service account or user credentials):

import gspread
import requests
import time
import json
from google.auth.transport.requests import Request
from google.oauth2.service_account import Credentials
# Config
SPREADSHEET_NAME = "Redirections"
SHEET_NAME = "Redirections"
BATCH_SIZE = 100
PROGRESS_FILE = "progress.json" # Can also be replaced with another sheet column
# Auth & connect to Google Sheet
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
creds = Credentials.from_service_account_file("credentials.json", scopes=SCOPES)
client = gspread.authorize(creds)
sheet = client.open(SPREADSHEET_NAME).worksheet(SHEET_NAME)
def load_progress():
try:
with open(PROGRESS_FILE, "r") as f:
return json.load(f).get("startRow", 2)
except FileNotFoundError:
return 2
def save_progress(row):
with open(PROGRESS_FILE, "w") as f:
json.dump({"startRow": row}, f)
def check_url_status(url):
try:
response = requests.get(url, timeout=5)
if response.status_code == 404:
return "404 Not Found"
else:
return f"Status Code: {response.status_code}"
except Exception as e:
return f"Error: {str(e)}"
def check_urls():
urls = sheet.col_values(1)[1:] # Skip header row
start_row = load_progress()
total_urls = len(urls)
if start_row > total_urls + 1:
print("All URLs processed.")
return
end_row = min(start_row + BATCH_SIZE - 1, total_urls + 1)
print(f"Processing rows {start_row} to {end_row}...")
updates = []
for i in range(start_row - 2, end_row - 1):
url = urls[i]
result = check_url_status(url)
updates.append([result])
# Update column B
sheet.update(f"B{start_row}:B{end_row}", updates)
save_progress(end_row + 1)
if end_row < total_urls + 1:
print("Sleeping before next batch...")
time.sleep(3)
check_urls() # Recursive call for next batch
else:
print("Processing complete.")
save_progress(2) # Reset for next full run
# Run it
check_urls()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment