Created
November 9, 2010 08:50
-
-
Save mopemope/668874 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import path | |
from werkzeug import secure_filename | |
import eventlet | |
from eventlet.green import urllib2 | |
from pyquery import PyQuery as pq | |
from urlparse import urlparse | |
import psyco | |
psyco.full() | |
search_urls = [ | |
'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=rd', | |
#'http://www.empflix.com/browsecat.php?page=%s&chid=17', | |
#'http://www.empflix.com/search.php?page=%s&what=Mondomuyou', | |
#'http://www.empflix.com/search.php?page=%s&what=Mondo64', | |
#'http://www.empflix.com/search.php?page=%s&what=trg', | |
#'http://www.empflix.com/search.php?page=%s&what=smr', | |
#'http://www.empflix.com/search.php?page=%s&what=tkyo', | |
#'http://www.empflix.com/search.php?page=%s&what=manko', | |
#'http://www.empflix.com/search.php?page=%s&what=omanko', | |
#'http://www.empflix.com/search.php?page=%s&what=rhj', | |
#'http://www.empflix.com/search.php?page=%s&what=Tokyo', | |
#'http://www.empflix.com/search.php?page=%s&what=TokyoHot', | |
#'http://www.empflix.com/search.php?page=%s&what=Tora', | |
#'http://www.empflix.com/search.php?page=%s&what=Sky+Angel', | |
#'http://www.empflix.com/search.php?page=%s&what=Santa+Gal', | |
#'http://www.empflix.com/search.php?page=%s&what=Mugen', | |
#'http://www.empflix.com/search.php?page=%s&what=XVN', | |
#'http://www.empflix.com/search.php?page=%s&what=Asami', | |
#'http://www.empflix.com/search.php?page=%s&what=haruka', | |
#'http://www.empflix.com/search.php?page=%s&what=Asuka', | |
#'http://www.empflix.com/search.php?page=%s&what=Maki', | |
#'http://www.empflix.com/search.php?page=%s&what=Nao', | |
#'http://www.empflix.com/search.php?page=%s&what=Yui', | |
#'http://www.empflix.com/search.php?page=%s&what=Yuki', | |
#'http://www.empflix.com/search.php?page=%s&what=Yuka', | |
#'http://www.empflix.com/search.php?page=%s&what=Saki', | |
#'http://www.empflix.com/search.php?page=%s&what=Rika', | |
#'http://www.empflix.com/search.php?page=%s&what=Riko', | |
#'http://www.empflix.com/search.php?page=%s&what=sara%%20Part2&sort=relevance', | |
#'http://www.empflix.com/search.php?page=%s&what=pakopako', | |
#'http://www.empflix.com/search.php?page=%s&what=pacopaco', | |
#'http://www.empflix.com/search.php?page=%s&what=Miku', | |
#'http://www.empflix.com/search.php?page=%s&what=0930', | |
#'http://www.empflix.com/search.php?page=%s&what=h0930', | |
#'http://www.empflix.com/search.php?page=%s&what=4610', | |
#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance', | |
#'http://www.empflix.com/search.php?page=%s&what=JAV%%20Amateur&sort=relevance', | |
#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance', | |
#'http://www.empflix.com/search.php?page=%s&what=jav%%20creampie&sort=relevance', | |
#'http://www.empflix.com/search.php?page=%s&what=Serina', | |
#'http://www.empflix.com/search.php?page=%s&what=hikaru', | |
#'http://www.empflix.com/search.php?page=%s&what=tsubaki', | |
#'http://www.empflix.com/search.php?page=%s&what=mikado', | |
#'http://www.empflix.com/search.php?page=%s&what=catwalk', | |
#'http://www.empflix.com/search.php?page=%s&what=Samurai', | |
#'http://www.empflix.com/search.php?page=%s&what=Jeans+Fetish', | |
#'http://www.empflix.com/search.php?page=%s&what=red+hot+fetish', | |
#'http://www.empflix.com/search.php?page=%s&what=pink+puncher', | |
#'http://www.empflix.com/search.php?page=%s&what=nakadashi&sort=relevance', | |
#'http://www.empflix.com/search.php?page=%s&what=okusama&sort=relevance', | |
#'http://www.empflix.com/search.php?page=%s&what=tokyo&sort=relevance', | |
#'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=mr', | |
#'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance', | |
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian', | |
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Creampie', | |
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Amateur&adv_category[]=Asian&adv_category[]=Creampie', | |
#'http://www.empflix.com/advanced_search.php?page=%s&what=&sort=length&per_page=0&adv_category[]=Asian&adv_category[]=Mature', | |
] | |
#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=japan%%20creampie&sort=relevance' | |
#empflix_cream_url = 'http://www.empflix.com/search.php?page=%s&what=pacopaco%%20sara%%20Part2&sort=relevance' | |
detail_urls = [] | |
id_mode = True | |
save_path = "/home/ma2/Public/empflix/" | |
pool = eventlet.GreenPool(2) | |
import re | |
download_re = re.compile("\s*so.addVariable\('config',\s*'([\w\d\.:/%=_-]*)'\);", re.M) | |
def get_pagelist(url, page=1): | |
q = [] | |
conn = urllib2.urlopen(url % page) | |
page = conn.read() | |
d = pq(page) | |
for span in d(".thumb"): | |
detail_url = pq(span.find("a")).attr.href | |
q.append(detail_url) | |
return q | |
def _get_flv(page): | |
match = download_re.search(page) | |
if match: | |
url = match.group(1) | |
#url = "http://cdn.tnaflix.com/" + url | |
import urllib | |
url = urllib.unquote(url) | |
conn = urllib2.urlopen(url) | |
data = conn.read() | |
d = pq(data) | |
download_url = d("file").text() | |
d = pq(page) | |
file_name = secure_filename(d("h2:first").text() + ".flv") | |
return download_url, file_name | |
def get_download_url(url): | |
conn = urllib2.urlopen(url) | |
page = "".join(conn.readlines()) | |
d = pq(page) | |
#download_url = d(".linkRight a:first").attr.href | |
download_url = d(".downloadButton").attr.href | |
if download_url: | |
parsed = urlparse(download_url) | |
file_name = parsed.path.split("/")[-1] | |
else: | |
download_url, file_name = _get_flv(page) | |
return url, download_url, file_name | |
def download_flv(url, down_url, file_name): | |
print "'%s' ---- Try Download ----" % url | |
out_path = path.join(save_path, file_name) | |
if not file_name: | |
print "'%s' ** Not Found Link ** " % url | |
return | |
partial = False | |
try: | |
conn = urllib2.urlopen(down_url) | |
length = conn.info()['Content-Length'] | |
length = int(length) | |
if length < 1024 * 1024 * 100 or length > 1024 * 1024 * 900: | |
print "*** '%s' is small! Skip!!!'%s' ***" % (url, length) | |
return | |
if path.exists(out_path): | |
size = path.getsize(out_path) | |
if size < length: | |
r = "bytes=%s-" % size | |
req = urllib2.Request(down_url, headers={"Range":r}) | |
conn = urllib2.urlopen(req) | |
print "'%s' == Resume!! '%s' ==" % (url, file_name) | |
print "'%s' == File '%s' Size: %d/%d'" % (url, file_name, size, length) | |
partial = True | |
else: | |
print "'%s' == Downloaded '%s' ==" % (url, file_name) | |
return | |
except: | |
import traceback | |
print traceback.format_exc() | |
pool.spawn_n(download, url) | |
return | |
if partial: | |
f = open(out_path, "rb+") | |
f.seek(0, 2) | |
else: | |
f = open(out_path, "wb") | |
print "'%s' == Start '%s' ==" % (url, file_name) | |
while True: | |
data = conn.read(1024 * 512 ) | |
if not data: | |
break | |
f.write(data) | |
#per = path.getsize(out_path) / float(length) * 100.0 | |
#print "'%s' == '%s' %d%% done. ==" % (url, file_name, per) | |
print "'%s' == Finish '%s' ==" % (url, file_name) | |
def download(url): | |
if url.find("premium.empflix.com") >= 0: | |
return | |
url, download_url, file_name = get_download_url(url) | |
id = urlparse(url).query[3:] | |
if id_mode: | |
file_name = id + "_" + file_name | |
if not download_url.startswith('#'): | |
if file_name.lower().find('mosaic') == -1: | |
#print download_url, file_name | |
download_flv(url, download_url, file_name) | |
q = [] | |
def start(url, min_page=66, max_page=70): | |
#def start(url, min_page=14, max_page=24): | |
for i in xrange(min_page, max_page+1): | |
urls = get_pagelist(url, page=i) | |
q.extend(urls) | |
q.reverse() | |
while q: | |
url = q.pop() | |
pool.spawn_n(download, url) | |
def read_detail_urls(file='empflix.txt'): | |
i = 0 | |
for href in open(file): | |
i += 1 | |
href = href.strip() | |
if href: | |
detail_urls.append(href) | |
if __name__ == '__main__': | |
#read_detail_urls() | |
#detail_urls.reverse() | |
#q.extend(detail_urls) | |
for url in search_urls: | |
start(url=url) | |
pool.waitall() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
i love