Skip to content

Instantly share code, notes, and snippets.

@tbbooher
Created May 27, 2025 01:19
Show Gist options
  • Save tbbooher/f773f74cfac3693f8e9979c2124ac63c to your computer and use it in GitHub Desktop.
Save tbbooher/f773f74cfac3693f8e9979c2124ac63c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import json
import time
import requests
from datetime import datetime
# ——— CONFIG ———
cities = [
"dallas","austin","houston","sanantonio","denver","seattle",
"sfbay","portland","boise","phoenix","chicago","boulder",
"reno","saltlakecity","minneapolis","charlotte","atlanta",
"losangeles","newyork","philadelphia"
]
UA = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/113.0.0.0 Safari/537.36"
)
OUTPUT_FILE = "raw_data.jsonl"
def fetch_city(city):
resp = requests.get(
f"https://{city}.craigslist.org/search/bia",
params={"has_image":1,"min_price":150,"query":"mountain bike","sort":"listing_date"},
headers={"User-Agent": UA},
timeout=10
)
resp.raise_for_status()
return resp.text
with open(OUTPUT_FILE, "a", encoding="utf-8") as out:
for city in cities:
print(f"[{city}] downloading…")
html = fetch_city(city)
# pull out the JSON-LD block
start = html.find('id="ld_searchpage_results"')
if start == -1:
print(f" ⚠️ no JSON-LD for {city}")
continue
# assume the <script> content is valid JSON
json_start = html.find("{", start)
json_end = html.find("</script>", json_start)
block = html[json_start:json_end].strip()
try:
data = json.loads(block)
except json.JSONDecodeError as e:
print(f" ❌ JSON parse error in {city}: {e}")
continue
# output number of records found
records = data.get("itemListElement", [])
print(f" ℹ️ found {len(records)} records in {city}")
timestamp = datetime.utcnow().isoformat()
for elt in records:
rec = {
"city": city,
"scraped_at": timestamp,
"item": elt.get("item", {})
}
out.write(json.dumps(rec) + "\n")
print(f" → wrote {len(records)} listings")
time.sleep(2)
print("Done downloading raw data.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment