Skip to content

Instantly share code, notes, and snippets.

@Waester
Last active September 1, 2024 07:27
Show Gist options
  • Save Waester/59df0fb9c35e461e342cf83221193fc6 to your computer and use it in GitHub Desktop.
Save Waester/59df0fb9c35e461e342cf83221193fc6 to your computer and use it in GitHub Desktop.
RSS = (
{ 'url': 'https://example.com' },
{ 'url': 'https://example.com', 'regex': '^.+720p.*$' },
)
PROXIES = {
'http': 'http://example.com:8080',
'https': 'https://example.com:8443',
}
TORRENT_DIR = 'torrents'
DRYRUN = False
#!/usr/bin/env python3
import feedparser
import os
import re
import requests
import json
import config
import asyncio
from time import mktime
from datetime import datetime
from urllib.parse import urlparse
class FeedHandler():
def __init__(self):
self.session = requests.Session()
async def get_torrent(self, entry):
filename = entry['link'].split('/')[-1]
os.makedirs(config.TORRENT_DIR, exist_ok=True)
try:
torrent = await asyncio.to_thread(self.session.get, entry['link'], timeout=5, proxies=config.PROXIES)
except Exception as e:
print(f"Request failed: {entry['title']}\n {e}")
return False
if not config.DRYRUN:
with open(f'{config.TORRENT_DIR}/{filename}', 'wb') as _file:
_file.write(torrent.content)
return True
async def add_magnet(self, url):
if not config.DRYRUN:
proc = await asyncio.create_subprocess_exec('transmission-remote', '--add', url, stdout=asyncio.subprocess.DEVNULL)
await proc.wait()
if proc.returncode != 0:
return False
return True
def get_timestamp(self, feed):
timestamp = int(datetime(2000, 1, 1).timestamp())
if feed['url'] in timestamps:
timestamp = timestamps[feed['url']][feed['regex']]
return timestamp
def set_timestamp(self, entries, feed):
timestamp = int(mktime(entries[0]['published_parsed']))
if feed['url'] not in timestamps:
timestamps[feed['url']] = dict()
timestamps[feed['url']][feed['regex']] = timestamp
async def download(self, entries, feed):
if 'regex' not in feed:
feed['regex'] = ''
timestamp = self.get_timestamp(feed)
update_timestamp = True
for entry in entries:
entry_date = int(mktime(entry['published_parsed']))
if re.match(feed['regex'], entry['title']) and entry_date > timestamp:
print(f"Downloading: {entry['title']}")
if entry['link'].startswith('http'):
if not await self.get_torrent(entry):
update_timestamp = False
else:
if not await self.add_magnet(entry['link']):
update_timestamp = False
if update_timestamp:
self.set_timestamp(entries, feed)
async def parse(self, feed):
try:
response = await asyncio.to_thread(self.session.get, feed['url'], timeout=5, proxies=config.PROXIES)
except Exception as e:
print(f"Request failed: {feed['url']}\n {e}")
return
parsed_feed = feedparser.parse(response.text)
if parsed_feed['bozo'] != 0:
print(f"Bad feed: {feed['url']}")
return
return parsed_feed['entries']
async def parse_and_download(self, feeds):
for feed in feeds:
entries = await self.parse(feed)
if entries:
await self.download(entries, feed)
self.session.close()
async def main():
coros = list()
hosts = dict()
for feed in config.RSS:
hostname = urlparse(feed['url']).hostname
if hostname not in hosts:
hosts[hostname] = list()
hosts[hostname].append(feed)
for host in hosts:
handler = FeedHandler()
coros.append(handler.parse_and_download(hosts[host]))
await asyncio.gather(*coros)
try:
with open('.timestamps', 'r') as _file:
timestamps = json.load(_file)
except FileNotFoundError:
timestamps = dict()
asyncio.run(main())
with open('.timestamps', 'w') as _file:
json.dump(timestamps, _file, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment