Created
April 30, 2010 14:33
-
-
Save dopuskh3/385280 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os, sys, re, cookielib, urllib2, random, logging | |
import datetime | |
import urllib | |
import time | |
import socket | |
from BeautifulSoup import BeautifulSoup | |
from threading import Thread | |
MEGAREGEX = '.*megavideo\.com.*v=([A-Za-z0-9]+).*' | |
DEBRIDREGEX = '.*(http://.+key=[a-z0-9-A-Z]+).*' | |
socket.setdefaulttimeout(300) | |
useragents = [ | |
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.1) Gecko/20061205 Iceweasel/2.0.0.1 (Debian-2.0.0.1+dfsg-2) ", | |
] | |
COOKIEFILE = 'cookies.txt' | |
class WatcherThread(Thread): | |
def __init__(self, size): | |
Thread.__init__(self) | |
self.size = size | |
self.fetched = 1 | |
self.bstop = False | |
self.start_time = time.time() | |
def run(self): | |
while not self.bstop: | |
progress_bar = "=" * int(((float(self.fetched) / float(self.size)) * 60)) | |
spaces = " "* (60 - len(progress_bar)) | |
percent = float(self.fetched) / float(self.size) * 100.0 | |
elapsed = time.time() - self.start_time | |
v = float(self.fetched / elapsed) | |
t = (self.size - self.fetched) / v | |
eta = str(datetime.timedelta(seconds = t)) | |
print "\r["+progress_bar+">"+spaces+"] %8sk/%8sk %f%% ETA %s "%(str(self.fetched/1024), str(self.size/1024), percent, eta), | |
time.sleep(1) | |
sys.stdout.flush() | |
def stop(self): | |
self.bstop = True | |
class BigFileDownloader(object): | |
def __init__(self, url, file): | |
self.packet_size = 128 | |
self.urlopener = urllib2.build_opener() | |
self.url = url | |
self.file = file | |
def download(self): | |
try: | |
request = urllib2.Request(self.url) | |
request.add_header("User-Agent", useragents[random.randint(0, len(useragents)-1)]) | |
fout = open(self.file, "wb") | |
datafd = self.urlopener.open(request) | |
totalsize = int(datafd.headers.get("content-length"))*8 | |
wt = WatcherThread(totalsize) | |
wt.start() | |
while True: | |
data = datafd.read(self.packet_size) | |
if not data: | |
break | |
fout.write(data) | |
wt.fetched += self.packet_size*8 | |
wt.stop() | |
wt.join() | |
fout.close() | |
except (KeyboardInterrupt, SystemExit): | |
wt.stop() | |
wt.join() | |
except Exception, e: | |
logging.critical("Cannot download %s"%str(e)) | |
class MegaDownloadDebrid(object): | |
def __init__(self, source_url, output_file): | |
self.source_url = source_url | |
self.output_file = output_file | |
# init cookie stuffs | |
self.cj = cookielib.LWPCookieJar() | |
self.cookiefile = COOKIEFILE | |
self.cookie_opener = None | |
self.__try_load_cookies() | |
def __try_load_cookies(self): | |
if os.path.isfile(self.cookiefile): | |
try: | |
self.cj.load(self.cookiefile) | |
except: | |
logging.critical('Invalid cookie file %s'%self.cookiefile) | |
def __save_cookies(self): | |
if self.cj != None: | |
for index, cookie in enumerate(self.cj): | |
logging.info("Fetched cookie %s : %s"%(str(index), str(cookie))) | |
self.cj.save(self.cookiefile) | |
def __build_cookie_opener(self): | |
self.cookie_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) | |
urllib2.install_opener(self.cookie_opener) | |
def __is_megavideo_link(self, link): | |
regex = re.compile(MEGAREGEX) | |
match = regex.match(link) | |
if match: | |
if match.group(1): | |
return match.group(1) | |
return None | |
def __get_debrid_link(self, megalink): | |
eb = [] | |
megaid = self.__is_megavideo_link(megalink) | |
if megaid: | |
request = urllib2.Request('http://www.fedbac-tools.com/debrid/?v=%s'%megaid) | |
request.add_header("User-Agent", useragents[random.randint(0, len(useragents)-1)]) | |
html = self.cookie_opener.open(request).read() | |
self.__save_cookies() | |
logging.info("Fetching url http://www.fedbac-tools.com/debrid/?v=%s"%megaid) | |
soup = BeautifulSoup(html) | |
links = [] | |
regex = re.compile(DEBRIDREGEX) | |
for l in soup.fetch('embed'): | |
flashvar = l.get('flashvars') | |
matches = regex.match(flashvar) | |
if matches: | |
if matches.group(1) != None: | |
return matches.group(1) | |
return None | |
def download(self): | |
links = self.get_megavideo_links() | |
if not links or len(links) == 0: | |
logging.critical("Cannot find links") | |
return | |
if len(links) > 1: | |
logging.error("More than one link found. Getting first") | |
link = links.pop() | |
delink = self.__get_debrid_link(link) | |
logging.info("Fetching debrided video from %s"%delink) | |
dl = BigFileDownloader(delink, self.output_file) | |
dl.download() | |
def get_megavideo_links(self): | |
links = self.__search_links(self.source_url) | |
megavideo_links = [] | |
for i in links: | |
if self.__is_megavideo_link(i): | |
megavideo_links.append(i) | |
for l in links: | |
rlinks = self.__search_links(self.source_url) | |
for i in rlinks: | |
if self.__is_megavideo_link(i): | |
megavideo_links.append(i) | |
if len(megavideo_links) < 1: | |
logging.error("Cannot find megavideo links") | |
else: | |
megalinks = set(megavideo_links) | |
for i in megalinks: | |
logging.info('Found megavideo link %s'%i) | |
return megalinks | |
return None | |
def __search_links(self, url): | |
if not self.cookie_opener: | |
self.__build_cookie_opener() | |
request = urllib2.Request(url) | |
request.add_header("User-Agent", useragents[random.randint(0, len(useragents)-1)]) | |
html = self.cookie_opener.open(request).read() | |
self.__save_cookies() | |
soup = BeautifulSoup(html) | |
links = [] | |
for l in soup.fetch('a'): | |
link = l.get('href') | |
if link: | |
if link.startswith('http') or link.startswith('www'): | |
links.append(link) | |
else: | |
links.append(''.join([url, '/', link])) | |
return links | |
if __name__ == "__main__": | |
logging.basicConfig(level = logging.DEBUG) | |
dl = MegaDownloadDebrid(sys.argv[1], sys.argv[2]) | |
dl.download() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment