Skip to content

Instantly share code, notes, and snippets.

@retasretas
Last active February 12, 2017 11:20
Show Gist options
  • Save retasretas/ef1e007734a8de757bc56c22f2eb5442 to your computer and use it in GitHub Desktop.
Save retasretas/ef1e007734a8de757bc56c22f2eb5442 to your computer and use it in GitHub Desktop.
日刊スポーツのサイトをスクレイプしてマー君の次回登板日をSlackに通知するスクリプト on ScrapingWiki
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import scraperwiki
import requests
import re
from pyquery import PyQuery as pq
# 3rd party library
try:
from urllib.parse import urljoin
from urllib.parse import urlencode
import urllib.request as urlrequest
except ImportError:
from urlparse import urljoin
from urllib import urlencode
import urllib2 as urlrequest
import json
class Slack():
def __init__(self, url=""):
self.url = url
self.opener = urlrequest.build_opener(urlrequest.HTTPHandler())
def notify(self, **kwargs):
"""
Send message to slack API
"""
return self.send(kwargs)
def send(self, payload):
"""
Send payload to slack API
"""
payload_json = json.dumps(payload)
data = urlencode({"payload": payload_json})
req = urlrequest.Request(self.url)
response = self.opener.open(req, data.encode('utf-8')).read()
return response.decode('utf-8')
try:
pre_text = scraperwiki.sqlite.select("text from swdata where id=0 limit 1")[0]['text']
except:
pre_text = ''
pass
html = requests.get("http://www.nikkansports.com/baseball/mlb/japanese/masahiro-tanaka/news/")
d = pq(html.content)
for i in d('.newslist li a').items():
pattern = ur"次"
matchOB = re.search(pattern , i.text())
if matchOB and pre_text != i.text():
print '---------Send Slack----------'
# Saving data:
unique_keys = [ 'id' ]
data = { 'id':0, 'text': i.text()}
scraperwiki.sql.save(unique_keys, data)
slack = Slack(url = "ここに投稿したいチャンネルのWeb hook URL")
slack.notify(text = i.text())
break
elif matchOB and pre_text == i.text():
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment