-
-
Save gear11/8006132 to your computer and use it in GitHub Desktop.
""" | |
A simple proxy server. Usage: | |
http://hostname:port/p/(URL to be proxied, minus protocol) | |
For example: | |
http://localhost:8080/p/www.google.com | |
""" | |
from flask import Flask, render_template, request, abort, Response, redirect | |
import requests | |
import logging | |
app = Flask(__name__.split('.')[0]) | |
logging.basicConfig(level=logging.INFO) | |
APPROVED_HOSTS = set(["google.com", "www.google.com", "yahoo.com"]) | |
CHUNK_SIZE = 1024 | |
LOG = logging.getLogger("main.py") | |
@app.route('/<path:url>') | |
def root(url): | |
LOG.info("Root route, path: %s", url) | |
# If referred from a proxy request, then redirect to a URL with the proxy prefix. | |
# This allows server-relative and protocol-relative URLs to work. | |
proxy_ref = proxy_ref_info(request) | |
if proxy_ref: | |
redirect_url = "/p/%s/%s%s" % (proxy_ref[0], url, ("?" + request.query_string if request.query_string else "")) | |
LOG.info("Redirecting referred URL to: %s", redirect_url) | |
return redirect(redirect_url) | |
# Otherwise, default behavior | |
return render_template('hello.html', name=url,request=request) | |
@app.route('/p/<path:url>') | |
def proxy(url): | |
"""Fetches the specified URL and streams it out to the client. | |
If the request was referred by the proxy itself (e.g. this is an image fetch for | |
a previously proxied HTML page), then the original Referer is passed.""" | |
r = get_source_rsp(url) | |
LOG.info("Got %s response from %s",r.status_code, url) | |
headers = dict(r.headers) | |
def generate(): | |
for chunk in r.iter_content(CHUNK_SIZE): | |
yield chunk | |
return Response(generate(), headers = headers) | |
def get_source_rsp(url): | |
url = 'http://%s' % url | |
LOG.info("Fetching %s", url) | |
# Ensure the URL is approved, else abort | |
if not is_approved(url): | |
LOG.warn("URL is not approved: %s", url) | |
abort(403) | |
# Pass original Referer for subsequent resource requests | |
proxy_ref = proxy_ref_info(request) | |
headers = { "Referer" : "http://%s/%s" % (proxy_ref[0], proxy_ref[1])} if proxy_ref else {} | |
# Fetch the URL, and stream it back | |
LOG.info("Fetching with headers: %s, %s", url, headers) | |
return requests.get(url, stream=True , params = request.args, headers=headers) | |
def is_approved(url): | |
"""Indicates whether the given URL is allowed to be fetched. This | |
prevents the server from becoming an open proxy""" | |
host = split_url(url)[1] | |
return host in APPROVED_HOSTS | |
def split_url(url): | |
"""Splits the given URL into a tuple of (protocol, host, uri)""" | |
proto, rest = url.split(':', 1) | |
rest = rest[2:].split('/', 1) | |
host, uri = (rest[0], rest[1]) if len(rest) == 2 else (rest[0], "") | |
return (proto, host, uri) | |
def proxy_ref_info(request): | |
"""Parses out Referer info indicating the request is from a previously proxied page. | |
For example, if: | |
Referer: http://localhost:8080/p/google.com/search?q=foo | |
then the result is: | |
("google.com", "search?q=foo") | |
""" | |
ref = request.headers.get('referer') | |
if ref: | |
_, _, uri = split_url(ref) | |
if uri.find("/") < 0: | |
return None | |
first, rest = uri.split("/", 1) | |
if first in "pd": | |
parts = rest.split("/", 1) | |
r = (parts[0], parts[1]) if len(parts) == 2 else (parts[0], "") | |
LOG.info("Referred by proxy host, uri: %s, %s", r[0], r[1]) | |
return r | |
return None |
I'm working on a flask-based, bitcoin micro-payments proxy for a small server. This code helped.
Content-Encoding gzip is not considered.
This was neat and helpful! Thanks
Doesn't work for video files - (specifically, the seeking). Does someone know why?
Example: /p/cdn.selz.com/plyr/1.5/View_From_A_Blue_Moon_Trailer-HD.mp4
Don't use generate() in line 48 else you get truncated data if content is gzipped encoded.
Either use r.raw.data (and delete generate function) or iterate over r.raw.stream(decode_content=False) instead of r.iter_content()
I'm still getting "Illegal or missing hexadecimal sequence in chunked-encoding" :(
I tied both with r.raw.data and r.raw.stream(...).
I can get response content via r.text and even construct the response Response but somehow client fails with
The response headers were:
Content-Type: application/vnd.orcid+xml; qs=5; charset=UTF-8'
Transfer-Encoding: chunked
Connection: keep-alive
Content-Encoding: gzip
Any ideas what might have gone wrong?
@gear11 do you have a preferred license for this? My preference is for MIT, but since I'm forking this wanted to pass that by you.
I used a similar approach and found that the requests library does weird things when there are multiple headers with the same name ("Set-Cookie", in my case). Changing this line:
return Response(generate(), headers = headers)
to:
return Response(generate(), headers = r.raw.headers.items())
fixed the problem.