Carpetfizz · December 29, 2023 00:42
diff --git a/README.md b/README.md
diff --git a/output_file b/output_file
 TWO BEDROOM APARTMENT IN BERKELEY!
 http://sfbay.craigslist.org/eby/apa/5502409578.html
 Mon 21 Mar 08:14:23 PM
 $3195
 / 2br - 1000ft

 Great Remodeled Apt. in top location; few blocks to UCB
 http://sfbay.craigslist.org/eby/apa/5458020372.html
 Mon 21 Mar 04:53:00 PM
 $3300
 / 2br - 

 Walk to UC Berkeley from this lovely 2 bdr apt. available June 3
 http://sfbay.craigslist.org/eby/apa/5502053675.html
 Mon 21 Mar 03:30:12 PM
 $2400
 / 2br - 

 Lovely, sunny 3 bdrm, 2 bath apt. near UC Berkeley available June 3
 http://sfbay.craigslist.org/eby/apa/5502035413.html
 Mon 21 Mar 03:18:30 PM
 $3200
 / 3br - 

 Southside 2 bed/2bath Unit with On-Site Manager and POOL
 http://sfbay.craigslist.org/eby/apa/5501969486.html
 Mon 21 Mar 03:00:16 PM
 $2990
 / 2br - 

 Elegant 2-story cottage minutes from Cal and BART
 http://sfbay.craigslist.org/eby/apa/5479135846.html
 Mon 21 Mar 02:58:38 PM
 $2995
 / 2br - 

 ... like a 100 of these ...
diff --git a/requirements.txt b/requirements.txt
 lxml
 requests
diff --git a/scraper.py b/scraper.py
 from lxml import html
 from sys import argv
 import requests

 WATCH_URL = "http://sfbay.craigslist.org/search/apa?search_distance=1&postal=94720&max_price=3300&bedrooms=2?format=rss"
 BASE_URL = "http://sfbay.craigslist.org"

 class Listing():

 	def __init__(self, _id, date, title, price, housing, link):
 		self._id = _id
 		self.link = link
 		self.date = date
 		self.title = title
 		self.price = price
 		self.housing = housing

 	def __repr__(self):
 		return "Listing("+self._id+", "+self.date+", "+self.title+", "+self.price+", "+self.housing+", "+self.link+")"

 def get_listings(watch_url, base_url):

 	page = requests.get(WATCH_URL)
 	tree = html.fromstring(page.content)

 	listing_list = tree.xpath('//*[@id="searchform"]/div[4]')[0]
 	listings_raw = listing_list[1:len(listing_list)-1] 
 	listings = []

 	for listing in listings_raw:

 		_id = listing.get("data-pid")
 		date = listing[1][1][0].get("title")
 		title = listing[1][1][1][0].text
 		price = listing[1][2][0].text
 		housing = listing[1][2][1].text
 		link = BASE_URL+listing[0].get("href")
 		
 		listings.append(Listing(_id, date, title, price, housing, link))

 	return listings

 def write_listings(of):
    all_listings = get_listings(WATCH_URL, BASE_URL)
    for listing in all_listings:
        of.write(listing.title + '\n')
        of.write(listing.link + '\n')
        of.write(listing.date + '\n')
        of.write(listing.price + '\n')
        of.write(listing.housing + '\n')
        of.write('\n')

 try:
    output_file = argv[1]
    if output_file:
        of = open(output_file, 'a')
        write_listings(of)
        of.close()
 except IndexError:
    print("Missing argument: output file name")
    print(get_listings(WATCH_URL, BASE_URL))
	TWO BEDROOM APARTMENT IN BERKELEY!
	http://sfbay.craigslist.org/eby/apa/5502409578.html
	Mon 21 Mar 08:14:23 PM
	$3195
	/ 2br - 1000ft

	Great Remodeled Apt. in top location; few blocks to UCB
	http://sfbay.craigslist.org/eby/apa/5458020372.html
	Mon 21 Mar 04:53:00 PM
	$3300
	/ 2br -

	Walk to UC Berkeley from this lovely 2 bdr apt. available June 3
	http://sfbay.craigslist.org/eby/apa/5502053675.html
	Mon 21 Mar 03:30:12 PM
	$2400
	/ 2br -

	Lovely, sunny 3 bdrm, 2 bath apt. near UC Berkeley available June 3
	http://sfbay.craigslist.org/eby/apa/5502035413.html
	Mon 21 Mar 03:18:30 PM
	$3200
	/ 3br -

	Southside 2 bed/2bath Unit with On-Site Manager and POOL
	http://sfbay.craigslist.org/eby/apa/5501969486.html
	Mon 21 Mar 03:00:16 PM
	$2990
	/ 2br -

	Elegant 2-story cottage minutes from Cal and BART
	http://sfbay.craigslist.org/eby/apa/5479135846.html
	Mon 21 Mar 02:58:38 PM
	$2995
	/ 2br -

	... like a 100 of these ...
	from lxml import html
	from sys import argv
	import requests

	WATCH_URL = "http://sfbay.craigslist.org/search/apa?search_distance=1&postal=94720&max_price=3300&bedrooms=2?format=rss"
	BASE_URL = "http://sfbay.craigslist.org"

	class Listing():

	def __init__(self, _id, date, title, price, housing, link):
	self._id = _id
	self.link = link
	self.date = date
	self.title = title
	self.price = price
	self.housing = housing

	def __repr__(self):
	return "Listing("+self._id+", "+self.date+", "+self.title+", "+self.price+", "+self.housing+", "+self.link+")"

	def get_listings(watch_url, base_url):

	page = requests.get(WATCH_URL)
	tree = html.fromstring(page.content)

	listing_list = tree.xpath('//*[@id="searchform"]/div[4]')[0]
	listings_raw = listing_list[1:len(listing_list)-1]
	listings = []

	for listing in listings_raw:

	_id = listing.get("data-pid")
	date = listing[1][1][0].get("title")
	title = listing[1][1][1][0].text
	price = listing[1][2][0].text
	housing = listing[1][2][1].text
	link = BASE_URL+listing[0].get("href")

	listings.append(Listing(_id, date, title, price, housing, link))

	return listings

	def write_listings(of):
	all_listings = get_listings(WATCH_URL, BASE_URL)
	for listing in all_listings:
	of.write(listing.title + '\n')
	of.write(listing.link + '\n')
	of.write(listing.date + '\n')
	of.write(listing.price + '\n')
	of.write(listing.housing + '\n')
	of.write('\n')

	try:
	output_file = argv[1]
	if output_file:
	of = open(output_file, 'a')
	write_listings(of)
	of.close()
	except IndexError:
	print("Missing argument: output file name")
	print(get_listings(WATCH_URL, BASE_URL))