ESWZY · September 28, 2021 21:06
diff --git a/telegram_news_example.py b/telegram_news_example.py
 # -*- coding: UTF-8 -*-
 import hashlib
 import json
 import os

 from sqlalchemy import create_engine
 from sqlalchemy.orm import Session
 from telegram_news.template import (
    InfoExtractor,
    NewsPostman,
    InfoExtractorJSON,
    NewsPostmanJSON
 )
 from telegram_news.utils import xml_to_json

 # Three required fields:
 # Your bot token gotten from @BotFather
 bot_token = os.getenv("TOKEN")

 # Add your bots into a channel as administrators
 channel = os.getenv("CHANNEL")

 # Your database to store old messages.
 DATABASE_URL = os.getenv("DATABASE_URL")

 # Create a database session
 engine = create_engine(DATABASE_URL)
 db = Session(bind=engine.connect())

 # The news source
 url = "https://en.wikinews.org/wiki/Main_Page"
 tag = "Wiki News"
 table_name = "wikinews"

 # Info extractor to process data format
 ie = InfoExtractor()

 # Select select element by CSS-based selector
 ie.set_list_selector('#MainPage_latest_news_text > ul > li')
 ie.set_title_selector('#firstHeading')
 ie.set_paragraph_selector('#mw-content-text > div > p:not(p:nth-child(1))')
 ie.set_time_selector('#mw-content-text > div > p:nth-child(1) > strong')
 ie.set_source_selector('span.sourceTemplate')

 # Set a max length for post, Max is 4096
 ie.max_post_length = 2000

 # News postman to manage sending affair
 np = NewsPostman(listURLs=[url, ], sendList=[channel, ], db=db, tag=tag)
 np.set_bot_token(bot_token)
 np.set_extractor(ie)
 np.set_table_name(table_name)

 #############################################################

 url_2 = "https://www.cnbeta.com/"
 tag_2 = "cnBeta"
 table_name_2 = "cnbetanews"

 ie_2 = InfoExtractor()
 ie_2.set_list_selector('.items-area > div > dl > dt > a')
 ie_2.set_title_selector('header > h1')

 # Select many target at same time
 ie_2.set_paragraph_selector('div.cnbeta-article-body > div.article-summary > p, '   # Summary only
                            'div.cnbeta-article-body > div.article-content > p')    # Content only
 ie_2.set_time_selector('header > div > span:nth-child(1)')
 ie_2.set_source_selector('header > div > span.source')

 # Select image to display, then the max length is down to 1024
 ie_2.set_image_selector('div.cnbeta-article-body > div.article-summary > p img, '   # From summary only
                        'div.cnbeta-article-body > div.article-content > p img')    # From content only
 ie_2.max_post_length = 1000

 np_2 = NewsPostman(listURLs=[url_2, ], sendList=[channel], tag=tag_2, db=db)
 np_2.set_extractor(ie_2)
 np_2.set_table_name(table_name_2)

 #############################################################

 url_3 = "https://www.scmp.com/rss/91/feed"
 tag_3 = "SCMP"
 table_name_3 = "scmpnews"

 ie_3 = InfoExtractorJSON()

 # Pre-process the XML string, convert to JSON string
 def list_pre_process(text):
    text = json.loads(xml_to_json(text))
    return json.dumps(text)

 ie_3.set_list_pre_process_policy(list_pre_process)

 # Route by key list
 ie_3.set_list_router(['rss', 'channel', 'item'])
 ie_3.set_link_router(['link'])
 ie_3.set_title_router(['title'])
 ie_3.set_paragraphs_router(['description'])
 ie_3.set_time_router(['pubDate'])
 ie_3.set_source_router(['author'])
 ie_3.set_image_router(['media:thumbnail', '@url'])

 # Customize ID for news item
 def id_policy(link):
    return hashlib.md5(link.encode("utf-8")).hexdigest()

 ie_3.set_id_policy(id_policy)

 np_3 = NewsPostmanJSON(listURLs=[url_3], sendList=[channel], db=db, tag=tag_3)
 np_3.set_extractor(ie_3)
 np_3.set_table_name(table_name_3)

 if __name__ == '__main__':
    np.poll()
    np_2.poll()
    np_3.poll()
	# -- coding: UTF-8 --
	import hashlib
	import json
	import os

	from sqlalchemy import create_engine
	from sqlalchemy.orm import Session
	from telegram_news.template import (
	InfoExtractor,
	NewsPostman,
	InfoExtractorJSON,
	NewsPostmanJSON
	)
	from telegram_news.utils import xml_to_json

	# Three required fields:
	# Your bot token gotten from @BotFather
	bot_token = os.getenv("TOKEN")

	# Add your bots into a channel as administrators
	channel = os.getenv("CHANNEL")

	# Your database to store old messages.
	DATABASE_URL = os.getenv("DATABASE_URL")

	# Create a database session
	engine = create_engine(DATABASE_URL)
	db = Session(bind=engine.connect())

	# The news source
	url = "https://en.wikinews.org/wiki/Main_Page"
	tag = "Wiki News"
	table_name = "wikinews"

	# Info extractor to process data format
	ie = InfoExtractor()

	# Select select element by CSS-based selector
	ie.set_list_selector('#MainPage_latest_news_text > ul > li')
	ie.set_title_selector('#firstHeading')
	ie.set_paragraph_selector('#mw-content-text > div > p:not(p:nth-child(1))')
	ie.set_time_selector('#mw-content-text > div > p:nth-child(1) > strong')
	ie.set_source_selector('span.sourceTemplate')

	# Set a max length for post, Max is 4096
	ie.max_post_length = 2000

	# News postman to manage sending affair
	np = NewsPostman(listURLs=[url, ], sendList=[channel, ], db=db, tag=tag)
	np.set_bot_token(bot_token)
	np.set_extractor(ie)
	np.set_table_name(table_name)

	#############################################################

	url_2 = "https://www.cnbeta.com/"
	tag_2 = "cnBeta"
	table_name_2 = "cnbetanews"

	ie_2 = InfoExtractor()
	ie_2.set_list_selector('.items-area > div > dl > dt > a')
	ie_2.set_title_selector('header > h1')

	# Select many target at same time
	ie_2.set_paragraph_selector('div.cnbeta-article-body > div.article-summary > p, ' # Summary only
	'div.cnbeta-article-body > div.article-content > p') # Content only
	ie_2.set_time_selector('header > div > span:nth-child(1)')
	ie_2.set_source_selector('header > div > span.source')

	# Select image to display, then the max length is down to 1024
	ie_2.set_image_selector('div.cnbeta-article-body > div.article-summary > p img, ' # From summary only
	'div.cnbeta-article-body > div.article-content > p img') # From content only
	ie_2.max_post_length = 1000

	np_2 = NewsPostman(listURLs=[url_2, ], sendList=[channel], tag=tag_2, db=db)
	np_2.set_extractor(ie_2)
	np_2.set_table_name(table_name_2)

	#############################################################

	url_3 = "https://www.scmp.com/rss/91/feed"
	tag_3 = "SCMP"
	table_name_3 = "scmpnews"

	ie_3 = InfoExtractorJSON()

	# Pre-process the XML string, convert to JSON string
	def list_pre_process(text):
	text = json.loads(xml_to_json(text))
	return json.dumps(text)

	ie_3.set_list_pre_process_policy(list_pre_process)

	# Route by key list
	ie_3.set_list_router(['rss', 'channel', 'item'])
	ie_3.set_link_router(['link'])
	ie_3.set_title_router(['title'])
	ie_3.set_paragraphs_router(['description'])
	ie_3.set_time_router(['pubDate'])
	ie_3.set_source_router(['author'])
	ie_3.set_image_router(['media:thumbnail', '@url'])

	# Customize ID for news item
	def id_policy(link):
	return hashlib.md5(link.encode("utf-8")).hexdigest()

	ie_3.set_id_policy(id_policy)

	np_3 = NewsPostmanJSON(listURLs=[url_3], sendList=[channel], db=db, tag=tag_3)
	np_3.set_extractor(ie_3)
	np_3.set_table_name(table_name_3)

	if __name__ == '__main__':
	np.poll()
	np_2.poll()
	np_3.poll()
No results found