-
-
Save asasuou/47d856b785236284817a to your computer and use it in GitHub Desktop.
Youtube Reverse Proxy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
resolver 8.8.8.8; | |
location /video/ { | |
if ($request_uri ~ "^/video/(.+?)/.+") { | |
set $upstream_host $1.googlevideo.com; | |
add_header Content-Disposition "attachment; filename=video.mp4;"; | |
} | |
rewrite /video/.+?/(.+)$ /$1 break; | |
proxy_buffering off; | |
proxy_pass https://$upstream_host; | |
proxy_set_header Host $upstream_host; | |
} | |
location /image/ { | |
if ($request_uri ~ "^/image/(.+?)/.+") { | |
set $upstream_host $1.ytimg.com; | |
} | |
rewrite /image/.+?/(.+)$ /$1 break; | |
proxy_buffering off; | |
proxy_pass http://$upstream_host; | |
proxy_set_header Host $upstream_host; | |
} | |
location /photo/ { | |
if ($request_uri ~ "^/photo/(.+?)/.+") { | |
set $upstream_host $1.ggpht.com; | |
} | |
rewrite /photo/.+?/(.+)$ /$1 break; | |
proxy_buffering off; | |
proxy_pass http://$upstream_host; | |
proxy_set_header Host $upstream_host; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import logging | |
import httplib | |
import os | |
import subprocess | |
import socket | |
import datetime | |
import random | |
import signal | |
import urllib2 | |
import urlparse | |
import urllib | |
import re | |
from gevent.wsgi import WSGIServer | |
import gevent.monkey | |
import gevent.pool | |
import gevent | |
import redis | |
import cgi | |
import functools | |
import Cookie | |
REDIS = redis.StrictRedis() | |
gevent.monkey.patch_all(subprocess=True) | |
proc_pool = gevent.pool.Pool(size=16) | |
LOGGER = logging.getLogger(__name__) | |
WORKERS = { | |
} # you need to fill this | |
LISTEN_IP = '' | |
LISTEN_PORT = 3000 | |
RE_YTIMG_CSS = re.compile(r'/s.ytimg\.com(.*?\.css)', re.IGNORECASE) | |
RE_YTIMG_ESC = re.compile(r'\\/([a-zA-Z0-9-]+?)\.ytimg\.com', re.IGNORECASE) | |
RE_YTIMG = re.compile(r'/([a-zA-Z0-9-]+?)\.ytimg\.com', re.IGNORECASE) | |
RE_GGPHT = re.compile(r'https://([a-zA-Z0-9-]+?)\.ggpht\.com', re.IGNORECASE) | |
RE_GOOGLEVIDEO = re.compile(r'/([a-zA-Z0-9-]+?)\.googlevideo\.com', re.IGNORECASE) | |
def handle_request(environ, start_response): | |
method = environ.get('REQUEST_METHOD') | |
try: | |
lines = handle(environ, lambda status, headers: start_response(get_http_response(status), headers)) | |
except: | |
path = environ.get('PATH_INFO', '').strip('/') | |
LOGGER.exception('failed to handle request: %s %s' % (method, path)) | |
start_response('500 INTERNAL_SERVER_ERROR', [ | |
('Content-Type', 'text/javascript'), | |
('Cache-Control', 'no-cache, no-store, must-revalidate'), | |
('Pragma', 'no-cache'), | |
('Expires', '0')]) | |
lines = ['Retry in 30 minutes'] | |
for line in lines: | |
yield line | |
def get_http_response(code): | |
if code not in httplib.responses: | |
return code | |
return '%s %s' % (code, httplib.responses[code]) | |
def replace_ytimg_css(match): | |
return '/your-reverse-proxy-ip/css/%s' % match.group(1) | |
def replace_ytimg_esc(match): | |
return '\\/%s\\/image/%s' % (pick_worker()[0], match.group(1)) | |
def replace_ytimg(match): | |
return '/%s/image/%s' % (pick_worker()[0], match.group(1)) | |
def replace_ggpht(match): | |
return 'http://%s/photo/%s' % (pick_worker()[0], match.group(1)) | |
def replace_googlevideo(worker, match): | |
return '/%s/video/%s' % (worker[0], match.group(1)) | |
def handle(environ, start_response): | |
host = 'youtube.com' | |
path = environ.get('PATH_INFO', '') | |
if '/watch' == path: | |
video_id = urlparse.parse_qs(environ['QUERY_STRING'])['v'][0] | |
return handle_watch(video_id, environ, start_response) | |
if '/watch_videos' == path: | |
video_id = urlparse.parse_qs(environ['QUERY_STRING'])['video_ids'][0].split(',')[0] | |
return handle_watch(video_id, environ, start_response) | |
if path.startswith('/css/'): | |
upstream_url = path.replace('/css/', '') | |
upstream_url = 'http://s.ytimg.com/%s' % upstream_url | |
return handle_css(upstream_url, environ, start_response) | |
if path.startswith('/t/'): | |
domain = path.replace('/t/', '').replace('.js', '') | |
words = 'window.location.href="http://%s";' % domain | |
start_response(httplib.OK, [ | |
('Content-Type', 'text/javascript'), | |
('Cache-Control', 'no-cache, no-store, must-revalidate'), | |
('Pragma', 'no-cache'), | |
('Expires', '0')]) | |
return [words] | |
if path.startswith('//'): | |
start_response(httplib.FOUND, [ | |
('Location', 'http:%s' % path) | |
]) | |
return [] | |
data = None | |
if 'POST' == environ['REQUEST_METHOD']: | |
if '/results' == path: | |
post_body = cgi.FieldStorage( | |
fp=environ['wsgi.input'], | |
environ=environ, | |
keep_blank_values=True) | |
upstream_url = 'http://youtube.com/results?%s' % urllib.urlencode({'search_query': post_body['search_query'].value}) | |
else: | |
data = environ['wsgi.input'].readline() | |
upstream_url = 'http://%s%s' % (host, path) | |
else: | |
upstream_url = 'http://%s%s' % (host, path) | |
if environ['QUERY_STRING']: | |
upstream_url = '%s?%s' % (upstream_url, environ['QUERY_STRING']) | |
LOGGER.info('upstream url: %s' % upstream_url) | |
headers = {} | |
if environ.get('HTTP_COOKIE'): | |
LOGGER.info('cookie is: %s' % environ.get('HTTP_COOKIE')) | |
headers['Cookie'] = environ.get('HTTP_COOKIE') | |
try: | |
response = urllib2.urlopen(urllib2.Request(upstream_url, data=data, headers=headers)) | |
except urllib2.HTTPError as e: | |
start_response(e.code, [(k, v) for k, v in e.hdrs.items()]) | |
return [e.msg] | |
except: | |
raise | |
headers = [] | |
for k, v in response.headers.items(): | |
if 'set-cookie' == k.lower(): | |
v = v.replace('domain=.youtube.com;', '') | |
if 'x-frame' in k.lower(): | |
continue | |
headers.append((k, v)) | |
start_response(httplib.OK, headers) | |
body = response.read() | |
body = RE_YTIMG_CSS.sub(replace_ytimg_css, body) | |
body = RE_YTIMG_ESC.sub(replace_ytimg_esc, body) | |
body = RE_YTIMG.sub(replace_ytimg, body) | |
body = RE_GGPHT.sub(replace_ggpht, body) | |
# body = body.replace('class="search-form', 'method="POST" class="search-form') | |
body = body.replace('class="video-masthead">', 'style="display: none;">') | |
body = body.replace('class="branded-page-v2-top-row">', 'style="display: none;">') | |
body = body.replace('style="z-index: 1">', 'style="display: none;">') | |
body = body.replace('style="z-index: 1;">', 'style="display: none;">') | |
body = body.replace('class="premium-yva-unexpanded"', 'style="display: none;"') | |
# body = body.replace('id="masthead-search"', 'style="position: relative; padding: 0; margin-top: 3px; overflow: hidden;"') | |
body = body.replace('ad.doubleclick.net', '127.0.0.1') | |
body = body.replace('www.youtube.com', 'your-reverse-proxy-ip') | |
return [body] | |
def handle_css(upstream_url, environ, start_response): | |
response = urllib2.urlopen(urllib2.Request(upstream_url)) | |
headers = [] | |
for k, v in response.headers.items(): | |
if 'set-cookie' == k.lower(): | |
v = v.replace('domain=.youtube.com;', '') | |
headers.append((k, v)) | |
start_response(httplib.OK, headers) | |
body = response.read() | |
body = RE_YTIMG.sub(replace_ytimg, body) | |
return [body] | |
def handle_watch(video_id, environ, start_response): | |
video_url = REDIS.get(video_id) | |
if video_url: | |
LOGGER.info('%s hit cache' % video_id) | |
else: | |
LOGGER.info('get url for movie: %s' % video_id) | |
try: | |
video_url = proc_pool.spawn(get_url, video_id).get() | |
except: | |
LOGGER.exception('failed to get url') | |
start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) | |
return ['no valid url'] | |
if 'googlevideo.com' not in video_url: | |
LOGGER.error('googlevideo.com not in url: %s' % video_url) | |
start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) | |
return ['no valid url'] | |
video_url = video_url.replace('https://', 'http://') | |
history = set() | |
success = False | |
for i in range(3): | |
worker = pick_worker(history) | |
try_url = RE_GOOGLEVIDEO.sub(functools.partial(replace_googlevideo, worker), video_url) | |
if is_url_correct(try_url): | |
video_url = try_url | |
success = True | |
break | |
# else: | |
# worker[1] = False | |
if not success: | |
start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) | |
return ['no valid url'] | |
REDIS.set(video_id, video_url) | |
REDIS.expire(video_id, 60 * 3) | |
LOGGER.info('got url for movie: %s %s' % (video_id, video_url)) | |
start_response(httplib.FOUND, [ | |
('Location', video_url), | |
('Content-Type', 'text/plain'), | |
('Cache-Control', 'max-age=180') | |
]) | |
return ['you can use this link to download the movie'] | |
def get_url(video_id): | |
if '/' in video_id: | |
raise Exception('evil') | |
return subprocess.check_output( | |
'youtube-dl http://www.youtube.com/watch?v=%s -g' | |
% video_id, shell=True).strip() | |
def serve_forever(): | |
try: | |
server = WSGIServer((LISTEN_IP, LISTEN_PORT), handle_request) | |
LOGGER.info('serving HTTP on port %s:%s...' % (LISTEN_IP, LISTEN_PORT)) | |
except: | |
LOGGER.exception('failed to start HTTP server on port %s:%s' % (LISTEN_IP, LISTEN_PORT)) | |
os._exit(1) | |
server.serve_forever() | |
def pick_worker(history=()): | |
if len(history) >= len(WORKERS): | |
raise Exception('no worker') | |
server_name = random.choice(WORKERS.keys()) | |
worker = random.choice(WORKERS[server_name]) | |
if not worker[1]: | |
return pick_worker(set(list(history) + [server_name])) | |
return worker | |
def is_url_correct(url): | |
class NoRedirectHandler(urllib2.HTTPRedirectHandler): | |
def http_error_302(self, req, fp, code, msg, headers): | |
infourl = urllib.addinfourl(fp, headers, req.get_full_url()) | |
infourl.status = code | |
infourl.code = code | |
return infourl | |
http_error_300 = http_error_302 | |
http_error_301 = http_error_302 | |
http_error_303 = http_error_302 | |
http_error_307 = http_error_302 | |
try: | |
opener = urllib2.build_opener(NoRedirectHandler()) | |
response = opener.open(url) | |
response.close() | |
if 200 == response.code: | |
return True | |
else: | |
LOGGER.error('status code %s for url %s' % (response.code, url)) | |
return False | |
except: | |
LOGGER.exception('try url failed: %s' % url) | |
return False | |
def refresh_workers(): | |
while True: | |
for workers in WORKERS.values(): | |
for worker in workers: | |
worker[1] = is_worker_alive(worker[0]) | |
LOGGER.info('%s refreshed workers' % datetime.datetime.now()) | |
gevent.sleep(60 * 60) | |
def is_worker_alive(worker_host): | |
try: | |
urllib2.urlopen('http://%s/image/i1/vi/tLcfAnN2QgY/mqdefault.jpg' % worker_host, timeout=3).close() | |
LOGGER.info('%s => OK' % worker_host) | |
return True | |
except: | |
LOGGER.info('%s => FAILURE' % worker_host) | |
return False | |
signal.signal(signal.SIGINT, lambda signum, fame: os._exit(0)) | |
logging.basicConfig(level=logging.DEBUG) | |
gevent.spawn(refresh_workers) | |
serve_forever() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment