Created
June 7, 2016 07:45
-
-
Save efazati/fe35d84ee9d1f760f4b5230ef29609a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding: utf-8 -*- | |
from lxml import html | |
import requests | |
from pymongo import MongoClient | |
from pprint import pprint | |
import urllib | |
import telepot | |
from datetime import datetime | |
client = MongoClient('mongodb://localhost:27017/') | |
token = '' | |
chat_id = '@...' | |
url = "http://www.yjc.ir/fa/photo" | |
db = client['telepy'] | |
article_obj = db.article | |
element = '' | |
def data_gathering(): | |
page = requests.get(url) | |
tree = html.fromstring(page.content) | |
element = tree | |
articles = tree.find_class('ax_faal') | |
result = [] | |
for article in articles: | |
item = {} | |
item['img'] = article.cssselect('img')[0].values()[-1] | |
addr = article.cssselect('a') | |
if addr: | |
item['url'] = addr[0].values()[1] | |
item['title'] = article.cssselect('.title_txt1')[0].text | |
item['data'] = datetime.now() | |
item['source'] = 'yjc' | |
result.append(item) | |
return result | |
def submit_data(bot, row): | |
if store_db(row): | |
rawimg = urllib.urlopen(row['img']) | |
print 'submit img url', row['img'] | |
print datetime.now() | |
return bot.sendPhoto(chat_id, ('newsimage.jpg', rawimg), caption='%s - @axekhabar' % row['title']) | |
return | |
def store_db(row): | |
article = article_obj.find_one({"img": row['img']}) | |
if not article: | |
id = article_obj.insert_one(row).inserted_id | |
return True | |
return False | |
def submit_alldata(data): | |
bot = telepot.Bot(token) | |
me = bot.getMe() | |
# print me | |
for row in data: | |
submit_data(bot, row) | |
# print row['title'] | |
print 'started', datetime.now() | |
result = data_gathering() | |
submit_alldata(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment