Skip to content

Instantly share code, notes, and snippets.

@shahidcodes
Created August 1, 2024 10:34
Show Gist options
  • Save shahidcodes/ceeb9a717c8006499d0142c8c06b44a3 to your computer and use it in GitHub Desktop.
Save shahidcodes/ceeb9a717c8006499d0142c8c06b44a3 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
#VERSION: 1.2
#AUTHORS: Joost Bremmer ([email protected])
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
try:
from HTMLParser import HTMLParser
except ModuleNotFoundError:
from html.parser import HTMLParser
# import qBT modules
try:
from novaprinter import prettyPrinter
from helpers import retrieve_url
except ModuleNotFoundError:
pass
class nyaasi(object):
"""Class used by qBittorrent to search for torrents."""
url = 'https://nya.iss.one'
name = 'Nyaa.si'
# Whether to use magnet links or download torrent files ###################
#
# Set to 'True' to use magnet links, or 'False' to use torrent files
use_magent_links = True
#
###########################################################################
# defines which search categories are supported by this search engine
# and their corresponding id. Possible categories are:
# 'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures',
# 'books'
supported_categories = {
'all': '0_0',
'anime': '1_0',
'books': '3_0',
'music': '2_0',
'pictures': '5_0',
'software': '6_0',
'tv': '4_0',
'movies': '4_0'}
class NyaasiParser(HTMLParser):
"""Parses Nyaa.si browse page for search results and stores them."""
def __init__(self, res, url, use_magnet=True):
"""Construct a nyaasi html parser.
Parameters:
:param list res: a list to store the results in
:param str url: the base url of the search engine
:param str use_magnet: whether to link to magnet links or torrent
files
"""
try:
super().__init__()
except TypeError:
# See: http://stackoverflow.com/questions/9698614/
HTMLParser.__init__(self)
self.engine_url = url
self.use_magnet_links = use_magnet
self.results = res
self.curr = None
self.td_counter = -1
def handle_starttag(self, tag, attr):
"""Tell the parser what to do with which tags."""
if tag == 'a':
self.start_a(attr)
def handle_endtag(self, tag):
"""Handle the closing of table cells."""
if tag == 'td':
self.start_td()
def start_a(self, attr):
"""Handle the opening of anchor tags."""
params = dict(attr)
# get torrent name
if 'title' in params and 'class' not in params \
and params['href'].startswith('/view/'):
hit = {
'name': params['title'],
'desc_link': self.engine_url + params['href']}
if not self.curr:
hit['engine_url'] = self.engine_url
self.curr = hit
elif 'href' in params and self.curr:
# skip unrelated links
if not params['href'].startswith("magnet:?") and \
not params['href'].endswith(".torrent"):
return
# check whether to use torrent files or magnet links,
# then search for a matching download link, and move on
if not self.use_magnet_links and \
params['href'].endswith(".torrent"):
self.curr['link'] = self.engine_url + params['href']
self.td_counter += 1
elif params['href'].startswith("magnet:?") \
and self.use_magnet_links:
self.curr['link'] = params['href']
self.td_counter += 1
def start_td(self):
"""Handle the opening of a table cell tag."""
# Keep track of timers
if self.td_counter >= 0:
self.td_counter += 1
# Add the hit to the results,
# then reset the counters for the next result
if self.td_counter >= 5:
self.results.append(self.curr)
self.curr = None
self.td_counter = -1
def handle_data(self, data):
"""Extract data about the torrent."""
# These fields matter
if self.td_counter > 0 and self.td_counter <= 5:
# Catch the size
if self.td_counter == 1:
self.curr['size'] = data.strip()
# Catch the seeds
elif self.td_counter == 3:
try:
self.curr['seeds'] = int(data.strip())
except ValueError:
self.curr['seeds'] = -1
# Catch the leechers
elif self.td_counter == 4:
try:
self.curr['leech'] = int(data.strip())
except ValueError:
self.curr['leech'] = -1
# The rest is not supported by prettyPrinter
else:
pass
# DO NOT CHANGE the name and parameters of this function
# This function will be the one called by nova2.py
def search(self, what, cat='all'):
"""
Retreive and parse engine search results by category and query.
Parameters:
:param what: a string with the search tokens, already escaped
(e.g. "Ubuntu+Linux")
:param cat: the name of a search category, see supported_categories.
"""
url = str("{0}/?f=0&s=seeders&o=desc&c={1}&q={2}"
.format(self.url,
self.supported_categories.get(cat),
what))
hits = []
page = 1
parser = self.NyaasiParser(hits, self.url, self.use_magent_links)
while True:
res = retrieve_url(url + "&p={}".format(page))
parser.feed(res)
for each in hits:
prettyPrinter(each)
if len(hits) < 75:
break
del hits[:]
page += 1
parser.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment