Skip to content

Instantly share code, notes, and snippets.

@ymotongpoo
Last active March 30, 2024 05:31
Show Gist options
  • Save ymotongpoo/816368 to your computer and use it in GitHub Desktop.
Save ymotongpoo/816368 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# expressing my strong respect for mopemope
import re
import urllib
from urlparse import urlunparse, urlparse
import gzip
import eventlet
from eventlet.green import urllib2
import cookielib
from pyquery import PyQuery as pq
from functools import partial
from werkzeug import secure_filename
import os.path
pool = eventlet.GreenPool()
search_url = ur"http://www.megaporn.com/video/"
method = "GET"
query = [u"creampie"]
charset = "utf-8"
# flvがあるページへのURL
# http://cdn.empflix.com/empflv/xxxxxxxxxx
target_url_ptn = re.compile("""
\s*flashvars\.embed\ =\ "
(?P<embed>\S+)
";
""", re.VERBOSE)
file_title_ptn = re.compile("""
\s*flashvars\.title\ =\ "
(?P<title>\S+)
";
""", re.VERBOSE)
flv_url_ptn = re.compile("""
<!--<file>
(?P<url>http://\S+\.flv)
</file>-->
""", re.VERBOSE)
download_dir = "./download/megaporn"
def build_opener():
jar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
urllib2.HTTPRedirectHandler())
opener.addheaders = [("User-Agent", "Mozilla/5.0 (compatible; python)"),
("Connection", "keep-alive")]
return opener
def get_search_result(opener, query):
view_urls = []
for q in query:
form_dict = {u'c': u'videos',
u'setlang': u'jp',
u's': unicode(q)}
params = urllib.urlencode(form_dict)
print params
conn = None
if method.upper() == "GET":
conn = opener.open(search_url + u'?' + params)
elif method.upper() == "POST":
conn = opener.open(search_url, params)
else:
raise ValueError, method
if conn:
page = conn.read().decode(charset)
d = pq(page)
for a in d(".vid_link_layer"):
# linkは "?v=xxxxxxx" の相対パスで取れる
view_url = pq(a).attr.href
print view_url
view_urls.append(search_url + view_url)
return view_urls
def _get_download_url(opener, view_url):
conn = opener.open(view_url)
data = conn.read()
# ファイル名
tmatch = file_title_ptn.search(data)
if tmatch:
td = tmatch.groupdict()
filename = secure_filename(urllib.unquote(td['title'])) + ".flv"
# 次のページに行ってflvの直リンクを取得
ematch = target_url_ptn.search(data)
if ematch:
ed = ematch.groupdict()
extra_page_url = urllib.unquote(ed['embed'])
download_url = extra_page_url
return download_url, filename
if __name__ == "__main__":
opener = build_opener()
view_urls = get_search_result(opener, query)
print _get_download_url(opener, view_urls[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment