Created
March 26, 2011 06:33
-
-
Save anonymous/888078 to your computer and use it in GitHub Desktop.
Downloads all the files on a Tokyo Toshokan search page.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#Instructions: | |
#Download and install python if you have not already. It comes default on most if not all *nix systems and OS X. | |
#Click "raw" in the upper right part of this box. Save it as ttdownload.py in any folder that you want. | |
#Download http://www.crummy.com/software/BeautifulSoup/download/3.x/BeautifulSoup-3.0.8/BeautifulSoup.py and save it in the same folder as the previous script. | |
#Open up a terminal in the folder that you have downloaded everything from. | |
#Type "python ttdownload.py". | |
#Search for the files that you want on Tokyo Toshokan. | |
#Paste the url of the Tokyo Toshokan search page when asked. | |
#Type the folder name that you want all of the .torrent files saved to (if left blank, it will default to the date and time) | |
#Now all of the .torrent files are saved in the folder specified. | |
#Watch some animu. | |
import re | |
import os | |
import sys | |
import urlparse | |
import urllib | |
from datetime import datetime | |
from BeautifulSoup import BeautifulSoup | |
# | |
# Make some colors for a better experiance in *nix | |
# | |
class bcolors: | |
HEADER = '\033[95m' | |
OKBLUE = '\033[94m' | |
OKGREEN = '\033[92m' | |
WARNING = '\033[93m' | |
FAIL = '\033[91m' | |
ENDC = '\033[0m' | |
def disable(self): | |
self.HEADER = '' | |
self.OKBLUE = '' | |
self.OKGREEN = '' | |
self.WARNING = '' | |
self.FAIL = '' | |
self.ENDC = '' | |
# | |
# Given a tokyo toshokan page, this function will get all the links, | |
# and pass the urls to getDownload(url) which will determine if they are valid | |
# then it reurns all valid links in their .torrent form | |
# | |
def getTorrents(url): | |
print "reading page..." | |
try: | |
gatherer = urllib.urlopen(url) | |
html = gatherer.read() | |
gatherer.close() | |
soup = BeautifulSoup("".join(html)) | |
links = soup.findAll(name="a",attrs={"rel":"nofollow","type":"application/x-bittorrent"}) | |
torrents = {} | |
for link in links: | |
name = link.string.encode("ascii","ignore") | |
try: | |
url = getDownload(link["href"]) | |
torrents[url] = name | |
except: | |
print bcolors.WARNING + "could not find a .torrent file for " + name + bcolors.ENDC | |
pass | |
return torrents | |
except: | |
sys.exit(bcolors.FAIL+"Malformed URL, quitting"+bcolors.ENDC) | |
# | |
# Given a link (offsite from tokyotosho) this function will | |
# try to locate the .torrent file | |
# | |
def getDownload(url): | |
url = url.strip() | |
#linking directly to the .torrent file | |
if re.search(".torrent",url): | |
return url | |
#nyaatorrents | |
elif re.search("nyaa",url) or re.search("nyaatorrents",url): | |
if re.search("page=download",url): | |
return url | |
else: | |
return url.replace("torrentinfo","download") | |
#idiots that redirect | |
else: | |
torrent=urllib.urlopen(url) | |
mimeType = torrent.info().gettype() | |
#check the mimetype of the redirected link | |
if re.search("application/x-bittorrent",mimeType): | |
return url | |
# | |
# Given a .torrent file, this function downloads it to the right folder | |
# | |
def download(link,name,folder="torrents_from_py"): | |
if not os.path.exists(folder): | |
os.makedirs(folder) | |
u = urllib.urlopen(link) | |
data = u.read() | |
u.close() | |
f = open(folder+"/"+name+".torrent","wb+") | |
f.write(data) | |
f.close() | |
# | |
# The body of our program | |
# | |
page = "" | |
folder = "" | |
if __name__ == '__main__': | |
# ask the user for the page that they want | |
page = raw_input("What url: \n") | |
print "Starting query..." | |
# get the list of torrents | |
torrents = getTorrents(page) | |
print bcolors.OKGREEN + str(len(torrents))+" torrents found" + bcolors.ENDC | |
if len(torrents)>0: | |
print "torrents found for the following items" | |
print "="*50 | |
#print the valid list | |
for name in torrents.itervalues(): | |
print bcolors.HEADER + name +bcolors.ENDC | |
print "="*50 | |
# get the folder that they want to save it in | |
now = datetime.now() | |
pre = str(now.year) + "_" + str(now.month) + "_" + str(now.day) + "_" + str(now.hour) + "_" + str(now.minute) + "_" + str(now.second) | |
folder = raw_input("Folder name: (leave blank to use "+pre+")\n") | |
if folder.strip() == "": | |
folder = pre | |
#download all the files | |
print "="*50 | |
for key,value in torrents.iteritems(): | |
print bcolors.OKGREEN + "downloading:",value | |
try: | |
download(key,value,folder) | |
except: | |
print bcolors.WARNING + "Failed downloading " +value + bcolors.ENDC | |
print bcolors.ENDC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment