Skip to content

Instantly share code, notes, and snippets.

@dopuskh3
Created April 30, 2010 14:33
Show Gist options
  • Save dopuskh3/385280 to your computer and use it in GitHub Desktop.
Save dopuskh3/385280 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os, sys, re, cookielib, urllib2, random, logging
import datetime
import urllib
import time
import socket
from BeautifulSoup import BeautifulSoup
from threading import Thread
MEGAREGEX = '.*megavideo\.com.*v=([A-Za-z0-9]+).*'
DEBRIDREGEX = '.*(http://.+key=[a-z0-9-A-Z]+).*'
socket.setdefaulttimeout(300)
useragents = [
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.1) Gecko/20061205 Iceweasel/2.0.0.1 (Debian-2.0.0.1+dfsg-2) ",
]
COOKIEFILE = 'cookies.txt'
class WatcherThread(Thread):
def __init__(self, size):
Thread.__init__(self)
self.size = size
self.fetched = 1
self.bstop = False
self.start_time = time.time()
def run(self):
while not self.bstop:
progress_bar = "=" * int(((float(self.fetched) / float(self.size)) * 60))
spaces = " "* (60 - len(progress_bar))
percent = float(self.fetched) / float(self.size) * 100.0
elapsed = time.time() - self.start_time
v = float(self.fetched / elapsed)
t = (self.size - self.fetched) / v
eta = str(datetime.timedelta(seconds = t))
print "\r["+progress_bar+">"+spaces+"] %8sk/%8sk %f%% ETA %s "%(str(self.fetched/1024), str(self.size/1024), percent, eta),
time.sleep(1)
sys.stdout.flush()
def stop(self):
self.bstop = True
class BigFileDownloader(object):
def __init__(self, url, file):
self.packet_size = 128
self.urlopener = urllib2.build_opener()
self.url = url
self.file = file
def download(self):
try:
request = urllib2.Request(self.url)
request.add_header("User-Agent", useragents[random.randint(0, len(useragents)-1)])
fout = open(self.file, "wb")
datafd = self.urlopener.open(request)
totalsize = int(datafd.headers.get("content-length"))*8
wt = WatcherThread(totalsize)
wt.start()
while True:
data = datafd.read(self.packet_size)
if not data:
break
fout.write(data)
wt.fetched += self.packet_size*8
wt.stop()
wt.join()
fout.close()
except (KeyboardInterrupt, SystemExit):
wt.stop()
wt.join()
except Exception, e:
logging.critical("Cannot download %s"%str(e))
class MegaDownloadDebrid(object):
def __init__(self, source_url, output_file):
self.source_url = source_url
self.output_file = output_file
# init cookie stuffs
self.cj = cookielib.LWPCookieJar()
self.cookiefile = COOKIEFILE
self.cookie_opener = None
self.__try_load_cookies()
def __try_load_cookies(self):
if os.path.isfile(self.cookiefile):
try:
self.cj.load(self.cookiefile)
except:
logging.critical('Invalid cookie file %s'%self.cookiefile)
def __save_cookies(self):
if self.cj != None:
for index, cookie in enumerate(self.cj):
logging.info("Fetched cookie %s : %s"%(str(index), str(cookie)))
self.cj.save(self.cookiefile)
def __build_cookie_opener(self):
self.cookie_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
urllib2.install_opener(self.cookie_opener)
def __is_megavideo_link(self, link):
regex = re.compile(MEGAREGEX)
match = regex.match(link)
if match:
if match.group(1):
return match.group(1)
return None
def __get_debrid_link(self, megalink):
eb = []
megaid = self.__is_megavideo_link(megalink)
if megaid:
request = urllib2.Request('http://www.fedbac-tools.com/debrid/?v=%s'%megaid)
request.add_header("User-Agent", useragents[random.randint(0, len(useragents)-1)])
html = self.cookie_opener.open(request).read()
self.__save_cookies()
logging.info("Fetching url http://www.fedbac-tools.com/debrid/?v=%s"%megaid)
soup = BeautifulSoup(html)
links = []
regex = re.compile(DEBRIDREGEX)
for l in soup.fetch('embed'):
flashvar = l.get('flashvars')
matches = regex.match(flashvar)
if matches:
if matches.group(1) != None:
return matches.group(1)
return None
def download(self):
links = self.get_megavideo_links()
if not links or len(links) == 0:
logging.critical("Cannot find links")
return
if len(links) > 1:
logging.error("More than one link found. Getting first")
link = links.pop()
delink = self.__get_debrid_link(link)
logging.info("Fetching debrided video from %s"%delink)
dl = BigFileDownloader(delink, self.output_file)
dl.download()
def get_megavideo_links(self):
links = self.__search_links(self.source_url)
megavideo_links = []
for i in links:
if self.__is_megavideo_link(i):
megavideo_links.append(i)
for l in links:
rlinks = self.__search_links(self.source_url)
for i in rlinks:
if self.__is_megavideo_link(i):
megavideo_links.append(i)
if len(megavideo_links) < 1:
logging.error("Cannot find megavideo links")
else:
megalinks = set(megavideo_links)
for i in megalinks:
logging.info('Found megavideo link %s'%i)
return megalinks
return None
def __search_links(self, url):
if not self.cookie_opener:
self.__build_cookie_opener()
request = urllib2.Request(url)
request.add_header("User-Agent", useragents[random.randint(0, len(useragents)-1)])
html = self.cookie_opener.open(request).read()
self.__save_cookies()
soup = BeautifulSoup(html)
links = []
for l in soup.fetch('a'):
link = l.get('href')
if link:
if link.startswith('http') or link.startswith('www'):
links.append(link)
else:
links.append(''.join([url, '/', link]))
return links
if __name__ == "__main__":
logging.basicConfig(level = logging.DEBUG)
dl = MegaDownloadDebrid(sys.argv[1], sys.argv[2])
dl.download()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment