gunyarakun · February 28, 2015 05:45
diff --git a/craigslist.py b/craigslist.py
 #!/usr/bin/env python
 # encoding: utf-8
 import json
 import urllib
 import urllib2
 import codecs
 import time
 from bs4 import BeautifulSoup

 token = 'xoxp-xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxx'
 #crawl_url = 'http://sfbay.craigslist.org/search/cto'
 crawl_url = 'http://sfbay.craigslist.org/search/cto?sort=priceasc&minAsk=3000&maxAsk=10000&auto_transmission=2&query=mini+cooper+convertible'
 bot_name = 'craigs bot'
 channel_name = 'craigs-check'

 class SlackClient:
    def __init__(self, token, bot_name='gunyabot'):
        self.token = token
        self.bot_name = bot_name
        self.channels = self.get_channels()

    @staticmethod
    def encoded_dict(in_dict):
        out_dict = {}
        for k, v in in_dict.iteritems():
            if isinstance(v, unicode):
                v = v.encode('utf8')
            elif isinstance(v, str):
                # Must be encoded in UTF-8
                v.decode('utf8')
            out_dict[k] = v
        return out_dict

    def request(self, request='?', post_data={}, domain='slack.com'):
        post_data['token'] = self.token
        enc_post_data = urllib.urlencode(self.encoded_dict(post_data))
        url = 'https://{}/api/{}'.format(domain, request)
        response = urllib2.urlopen(url, enc_post_data)
        response_str = response.read()
        response_obj = json.loads(response_str)
        if not response_obj['ok']:
            raise ValueError
        return response_obj

    def get_channels(self):
        d = {}
        for channel in self.request('channels.list')['channels']:
            d[channel['name']] = channel
        return d

    def post_message(self, channel_name, text, attachments=[]):
        post_data = {
            'channel': self.channels[channel_name]['id'],
            'text': text,
            'username': self.bot_name,
            'attachments': json.dumps(attachments),
        }
        return self.request('chat.postMessage', post_data)

 def parse_item_page(item_url):
    f = codecs.getreader('utf-8')(urllib.urlopen(item_url))
    html = f.read()
    soup = BeautifulSoup(html)

    posting_title_h2 = soup.find('h2', class_='postingtitle')
    title = posting_title_h2.find_next(True).next_sibling
    price = title.next_sibling

    info = {
        'title': title.string + price.string,
        'title_link': item_url,
    }
    photo_img = soup.find('img')
    if photo_img:
        info['image_url'] = photo_img['src']
    return info

 sc = SlackClient(token, bot_name)
 first_run = True
 checked_item_hrefs = set()
 while True:
    f = codecs.getreader('utf-8')(urllib.urlopen(crawl_url))
    html = f.read()
    soup = BeautifulSoup(html)
    item_links = soup.find_all('a', class_='hdrlnk')
    for item_link in item_links:
        href = item_link['href']
        if href in checked_item_hrefs:
            break
        if not first_run:
            item_url = 'http://sfbay.craigslist.org' + href
            item_info = parse_item_page(item_url)
            item_info['fallback'] = item_info['title']
            sc.post_message(channel_name, u'Found ' + item_url, [item_info])
        checked_item_hrefs.add(href)
    first_run = False
    time.sleep(30)
diff --git a/pip.sh b/pip.sh
 pip install beautifulsoup4
	#!/usr/bin/env python
	# encoding: utf-8
	import json
	import urllib
	import urllib2
	import codecs
	import time
	from bs4 import BeautifulSoup

	token = 'xoxp-xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxx'
	#crawl_url = 'http://sfbay.craigslist.org/search/cto'
	crawl_url = 'http://sfbay.craigslist.org/search/cto?sort=priceasc&minAsk=3000&maxAsk=10000&auto_transmission=2&query=mini+cooper+convertible'
	bot_name = 'craigs bot'
	channel_name = 'craigs-check'

	class SlackClient:
	def __init__(self, token, bot_name='gunyabot'):
	self.token = token
	self.bot_name = bot_name
	self.channels = self.get_channels()

	@staticmethod
	def encoded_dict(in_dict):
	out_dict = {}
	for k, v in in_dict.iteritems():
	if isinstance(v, unicode):
	v = v.encode('utf8')
	elif isinstance(v, str):
	# Must be encoded in UTF-8
	v.decode('utf8')
	out_dict[k] = v
	return out_dict

	def request(self, request='?', post_data={}, domain='slack.com'):
	post_data['token'] = self.token
	enc_post_data = urllib.urlencode(self.encoded_dict(post_data))
	url = 'https://{}/api/{}'.format(domain, request)
	response = urllib2.urlopen(url, enc_post_data)
	response_str = response.read()
	response_obj = json.loads(response_str)
	if not response_obj['ok']:
	raise ValueError
	return response_obj

	def get_channels(self):
	d = {}
	for channel in self.request('channels.list')['channels']:
	d[channel['name']] = channel
	return d

	def post_message(self, channel_name, text, attachments=[]):
	post_data = {
	'channel': self.channels[channel_name]['id'],
	'text': text,
	'username': self.bot_name,
	'attachments': json.dumps(attachments),
	}
	return self.request('chat.postMessage', post_data)

	def parse_item_page(item_url):
	f = codecs.getreader('utf-8')(urllib.urlopen(item_url))
	html = f.read()
	soup = BeautifulSoup(html)

	posting_title_h2 = soup.find('h2', class_='postingtitle')
	title = posting_title_h2.find_next(True).next_sibling
	price = title.next_sibling

	info = {
	'title': title.string + price.string,
	'title_link': item_url,
	}
	photo_img = soup.find('img')
	if photo_img:
	info['image_url'] = photo_img['src']
	return info

	sc = SlackClient(token, bot_name)
	first_run = True
	checked_item_hrefs = set()
	while True:
	f = codecs.getreader('utf-8')(urllib.urlopen(crawl_url))
	html = f.read()
	soup = BeautifulSoup(html)
	item_links = soup.find_all('a', class_='hdrlnk')
	for item_link in item_links:
	href = item_link['href']
	if href in checked_item_hrefs:
	break
	if not first_run:
	item_url = 'http://sfbay.craigslist.org' + href
	item_info = parse_item_page(item_url)
	item_info['fallback'] = item_info['title']
	sc.post_message(channel_name, u'Found ' + item_url, [item_info])
	checked_item_hrefs.add(href)
	first_run = False
	time.sleep(30)