|
import gspread |
|
import requests |
|
import time |
|
import json |
|
from google.auth.transport.requests import Request |
|
from google.oauth2.service_account import Credentials |
|
|
|
# Config |
|
SPREADSHEET_NAME = "Redirections" |
|
SHEET_NAME = "Redirections" |
|
BATCH_SIZE = 100 |
|
PROGRESS_FILE = "progress.json" # Can also be replaced with another sheet column |
|
|
|
# Auth & connect to Google Sheet |
|
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"] |
|
creds = Credentials.from_service_account_file("credentials.json", scopes=SCOPES) |
|
client = gspread.authorize(creds) |
|
sheet = client.open(SPREADSHEET_NAME).worksheet(SHEET_NAME) |
|
|
|
def load_progress(): |
|
try: |
|
with open(PROGRESS_FILE, "r") as f: |
|
return json.load(f).get("startRow", 2) |
|
except FileNotFoundError: |
|
return 2 |
|
|
|
def save_progress(row): |
|
with open(PROGRESS_FILE, "w") as f: |
|
json.dump({"startRow": row}, f) |
|
|
|
def check_url_status(url): |
|
try: |
|
response = requests.get(url, timeout=5) |
|
if response.status_code == 404: |
|
return "404 Not Found" |
|
else: |
|
return f"Status Code: {response.status_code}" |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
def check_urls(): |
|
urls = sheet.col_values(1)[1:] # Skip header row |
|
start_row = load_progress() |
|
total_urls = len(urls) |
|
|
|
if start_row > total_urls + 1: |
|
print("All URLs processed.") |
|
return |
|
|
|
end_row = min(start_row + BATCH_SIZE - 1, total_urls + 1) |
|
|
|
print(f"Processing rows {start_row} to {end_row}...") |
|
updates = [] |
|
|
|
for i in range(start_row - 2, end_row - 1): |
|
url = urls[i] |
|
result = check_url_status(url) |
|
updates.append([result]) |
|
|
|
# Update column B |
|
sheet.update(f"B{start_row}:B{end_row}", updates) |
|
save_progress(end_row + 1) |
|
|
|
if end_row < total_urls + 1: |
|
print("Sleeping before next batch...") |
|
time.sleep(3) |
|
check_urls() # Recursive call for next batch |
|
else: |
|
print("Processing complete.") |
|
save_progress(2) # Reset for next full run |
|
|
|
# Run it |
|
check_urls() |