Created
August 1, 2024 10:34
-
-
Save shahidcodes/ceeb9a717c8006499d0142c8c06b44a3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
#VERSION: 1.2 | |
#AUTHORS: Joost Bremmer ([email protected]) | |
# | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <https://www.gnu.org/licenses/>. | |
try: | |
from HTMLParser import HTMLParser | |
except ModuleNotFoundError: | |
from html.parser import HTMLParser | |
# import qBT modules | |
try: | |
from novaprinter import prettyPrinter | |
from helpers import retrieve_url | |
except ModuleNotFoundError: | |
pass | |
class nyaasi(object): | |
"""Class used by qBittorrent to search for torrents.""" | |
url = 'https://nya.iss.one' | |
name = 'Nyaa.si' | |
# Whether to use magnet links or download torrent files ################### | |
# | |
# Set to 'True' to use magnet links, or 'False' to use torrent files | |
use_magent_links = True | |
# | |
########################################################################### | |
# defines which search categories are supported by this search engine | |
# and their corresponding id. Possible categories are: | |
# 'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', | |
# 'books' | |
supported_categories = { | |
'all': '0_0', | |
'anime': '1_0', | |
'books': '3_0', | |
'music': '2_0', | |
'pictures': '5_0', | |
'software': '6_0', | |
'tv': '4_0', | |
'movies': '4_0'} | |
class NyaasiParser(HTMLParser): | |
"""Parses Nyaa.si browse page for search results and stores them.""" | |
def __init__(self, res, url, use_magnet=True): | |
"""Construct a nyaasi html parser. | |
Parameters: | |
:param list res: a list to store the results in | |
:param str url: the base url of the search engine | |
:param str use_magnet: whether to link to magnet links or torrent | |
files | |
""" | |
try: | |
super().__init__() | |
except TypeError: | |
# See: http://stackoverflow.com/questions/9698614/ | |
HTMLParser.__init__(self) | |
self.engine_url = url | |
self.use_magnet_links = use_magnet | |
self.results = res | |
self.curr = None | |
self.td_counter = -1 | |
def handle_starttag(self, tag, attr): | |
"""Tell the parser what to do with which tags.""" | |
if tag == 'a': | |
self.start_a(attr) | |
def handle_endtag(self, tag): | |
"""Handle the closing of table cells.""" | |
if tag == 'td': | |
self.start_td() | |
def start_a(self, attr): | |
"""Handle the opening of anchor tags.""" | |
params = dict(attr) | |
# get torrent name | |
if 'title' in params and 'class' not in params \ | |
and params['href'].startswith('/view/'): | |
hit = { | |
'name': params['title'], | |
'desc_link': self.engine_url + params['href']} | |
if not self.curr: | |
hit['engine_url'] = self.engine_url | |
self.curr = hit | |
elif 'href' in params and self.curr: | |
# skip unrelated links | |
if not params['href'].startswith("magnet:?") and \ | |
not params['href'].endswith(".torrent"): | |
return | |
# check whether to use torrent files or magnet links, | |
# then search for a matching download link, and move on | |
if not self.use_magnet_links and \ | |
params['href'].endswith(".torrent"): | |
self.curr['link'] = self.engine_url + params['href'] | |
self.td_counter += 1 | |
elif params['href'].startswith("magnet:?") \ | |
and self.use_magnet_links: | |
self.curr['link'] = params['href'] | |
self.td_counter += 1 | |
def start_td(self): | |
"""Handle the opening of a table cell tag.""" | |
# Keep track of timers | |
if self.td_counter >= 0: | |
self.td_counter += 1 | |
# Add the hit to the results, | |
# then reset the counters for the next result | |
if self.td_counter >= 5: | |
self.results.append(self.curr) | |
self.curr = None | |
self.td_counter = -1 | |
def handle_data(self, data): | |
"""Extract data about the torrent.""" | |
# These fields matter | |
if self.td_counter > 0 and self.td_counter <= 5: | |
# Catch the size | |
if self.td_counter == 1: | |
self.curr['size'] = data.strip() | |
# Catch the seeds | |
elif self.td_counter == 3: | |
try: | |
self.curr['seeds'] = int(data.strip()) | |
except ValueError: | |
self.curr['seeds'] = -1 | |
# Catch the leechers | |
elif self.td_counter == 4: | |
try: | |
self.curr['leech'] = int(data.strip()) | |
except ValueError: | |
self.curr['leech'] = -1 | |
# The rest is not supported by prettyPrinter | |
else: | |
pass | |
# DO NOT CHANGE the name and parameters of this function | |
# This function will be the one called by nova2.py | |
def search(self, what, cat='all'): | |
""" | |
Retreive and parse engine search results by category and query. | |
Parameters: | |
:param what: a string with the search tokens, already escaped | |
(e.g. "Ubuntu+Linux") | |
:param cat: the name of a search category, see supported_categories. | |
""" | |
url = str("{0}/?f=0&s=seeders&o=desc&c={1}&q={2}" | |
.format(self.url, | |
self.supported_categories.get(cat), | |
what)) | |
hits = [] | |
page = 1 | |
parser = self.NyaasiParser(hits, self.url, self.use_magent_links) | |
while True: | |
res = retrieve_url(url + "&p={}".format(page)) | |
parser.feed(res) | |
for each in hits: | |
prettyPrinter(each) | |
if len(hits) < 75: | |
break | |
del hits[:] | |
page += 1 | |
parser.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment