Last active
September 1, 2024 07:27
-
-
Save Waester/59df0fb9c35e461e342cf83221193fc6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
RSS = ( | |
{ 'url': 'https://example.com' }, | |
{ 'url': 'https://example.com', 'regex': '^.+720p.*$' }, | |
) | |
PROXIES = { | |
'http': 'http://example.com:8080', | |
'https': 'https://example.com:8443', | |
} | |
TORRENT_DIR = 'torrents' | |
DRYRUN = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import feedparser | |
import os | |
import re | |
import requests | |
import json | |
import config | |
import asyncio | |
from time import mktime | |
from datetime import datetime | |
from urllib.parse import urlparse | |
class FeedHandler(): | |
def __init__(self): | |
self.session = requests.Session() | |
async def get_torrent(self, entry): | |
filename = entry['link'].split('/')[-1] | |
os.makedirs(config.TORRENT_DIR, exist_ok=True) | |
try: | |
torrent = await asyncio.to_thread(self.session.get, entry['link'], timeout=5, proxies=config.PROXIES) | |
except Exception as e: | |
print(f"Request failed: {entry['title']}\n {e}") | |
return False | |
if not config.DRYRUN: | |
with open(f'{config.TORRENT_DIR}/{filename}', 'wb') as _file: | |
_file.write(torrent.content) | |
return True | |
async def add_magnet(self, url): | |
if not config.DRYRUN: | |
proc = await asyncio.create_subprocess_exec('transmission-remote', '--add', url, stdout=asyncio.subprocess.DEVNULL) | |
await proc.wait() | |
if proc.returncode != 0: | |
return False | |
return True | |
def get_timestamp(self, feed): | |
timestamp = int(datetime(2000, 1, 1).timestamp()) | |
if feed['url'] in timestamps: | |
timestamp = timestamps[feed['url']][feed['regex']] | |
return timestamp | |
def set_timestamp(self, entries, feed): | |
timestamp = int(mktime(entries[0]['published_parsed'])) | |
if feed['url'] not in timestamps: | |
timestamps[feed['url']] = dict() | |
timestamps[feed['url']][feed['regex']] = timestamp | |
async def download(self, entries, feed): | |
if 'regex' not in feed: | |
feed['regex'] = '' | |
timestamp = self.get_timestamp(feed) | |
update_timestamp = True | |
for entry in entries: | |
entry_date = int(mktime(entry['published_parsed'])) | |
if re.match(feed['regex'], entry['title']) and entry_date > timestamp: | |
print(f"Downloading: {entry['title']}") | |
if entry['link'].startswith('http'): | |
if not await self.get_torrent(entry): | |
update_timestamp = False | |
else: | |
if not await self.add_magnet(entry['link']): | |
update_timestamp = False | |
if update_timestamp: | |
self.set_timestamp(entries, feed) | |
async def parse(self, feed): | |
try: | |
response = await asyncio.to_thread(self.session.get, feed['url'], timeout=5, proxies=config.PROXIES) | |
except Exception as e: | |
print(f"Request failed: {feed['url']}\n {e}") | |
return | |
parsed_feed = feedparser.parse(response.text) | |
if parsed_feed['bozo'] != 0: | |
print(f"Bad feed: {feed['url']}") | |
return | |
return parsed_feed['entries'] | |
async def parse_and_download(self, feeds): | |
for feed in feeds: | |
entries = await self.parse(feed) | |
if entries: | |
await self.download(entries, feed) | |
self.session.close() | |
async def main(): | |
coros = list() | |
hosts = dict() | |
for feed in config.RSS: | |
hostname = urlparse(feed['url']).hostname | |
if hostname not in hosts: | |
hosts[hostname] = list() | |
hosts[hostname].append(feed) | |
for host in hosts: | |
handler = FeedHandler() | |
coros.append(handler.parse_and_download(hosts[host])) | |
await asyncio.gather(*coros) | |
try: | |
with open('.timestamps', 'r') as _file: | |
timestamps = json.load(_file) | |
except FileNotFoundError: | |
timestamps = dict() | |
asyncio.run(main()) | |
with open('.timestamps', 'w') as _file: | |
json.dump(timestamps, _file, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment