Created
March 4, 2023 15:13
-
-
Save pawanjay176/c93e82d2e31688e19c62ec67270974d1 to your computer and use it in GitHub Desktop.
Scrapes apartment listings on craigslist
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: python3 apartment.py <RADIUS-FROM-YALETOWN> | |
import requests | |
DB_FILE = "db.txt" | |
# Because craigslist is dumb | |
def get_name(a, b, c): | |
if isinstance(a, list) and len(a) == 2 and a[0] == 6: | |
return a[1] | |
elif isinstance(b, list) and len(b) == 2 and b[0] == 6: | |
return b[1] | |
elif isinstance(c, list) and len(c) == 2 and c[0] == 6: | |
return c[1] | |
else: | |
return "ohh noo" | |
BASE_API_URL = "https://sapi.craigslist.org/web/v7/postings/search/full?" | |
BASE_LISTING_URL = "https://vancouver.craigslist.org/van/apa/d/" | |
MAX_PRICE = 2500 | |
RADIUS = 4 | |
b = "batch=16-0-360-0-0&cc=US&lang=en&max_price=2500&postal=V6B5P2&searchPath=apa&search_distance=3.125" | |
params = { | |
"batch": "16-0-360-0-0", | |
"cc": "US", | |
"lang": "en", | |
"max_price": str(MAX_PRICE), | |
"postal": "V6B5P2", # Yaletown | |
"searchPath": "apa", | |
"search_distance": RADIUS, | |
} | |
def get_listings(): | |
listings = list() | |
r = requests.get(BASE_API_URL, params=params) | |
response = r.json() | |
# This is the base of the unique id for each listing | |
min_posting_id = response["data"]["decode"]["minPostingId"] | |
for item in response["data"]["items"]: | |
listing_uid = item[0] + min_posting_id | |
name = get_name(item[6], item[7], item[5]) | |
try: | |
listing_url = BASE_LISTING_URL + name + "/" + str(listing_uid) + ".html" | |
listings.append(listing_url) | |
except Exception as e: | |
print(e) | |
print(name) | |
return listings[:20] | |
def send_on_channel(listings_to_send): | |
bot_token="6255577679:AAHhEW-FREcAnjv65qvKkayO33CfS0Qp4U0" | |
channel_id="-1001566731041" | |
url = "https://api.telegram.org/bot" + bot_token + "/sendMessage?chat_id=" + channel_id + "&text=" | |
for listing in listings_to_send: | |
r = requests.get(url + listing) | |
def persist(listings): | |
with open(DB_FILE, "w") as f: | |
f.writelines(line + '\n' for line in listings) | |
def load(): | |
with open(DB_FILE, "r") as f: | |
listings = f.readlines() | |
return [line.strip() for line in listings] | |
def main(): | |
import os | |
if not os.path.isfile(DB_FILE): | |
listings = get_listings() | |
persist(listings) | |
send_on_channel(listings) | |
else: | |
persisted_listings = load() | |
listings = get_listings() | |
new_listings = set(listings).difference(persisted_listings) | |
all_listings = set(listings).union(persisted_listings) | |
print(new_listings) | |
persist(all_listings) | |
send_on_channel(new_listings) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment