-
-
Save wastee/94c580175250b86da5e2b5f66d5e8fc4 to your computer and use it in GitHub Desktop.
Simple Python proxy server based on Flask and Requests with support for GET and POST requests.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A simple proxy server, based on original by gear11: | |
https://gist.github.com/gear11/8006132 | |
Modified from original to support both GET and POST, status code passthrough, header and form data passthrough. | |
Usage: http://hostname:port/p/(URL to be proxied, minus protocol) | |
For example: http://localhost:5000/p/www.google.com | |
""" | |
import re | |
from urllib.parse import urlparse, urlunparse | |
from flask import Flask, render_template, request, abort, Response, redirect | |
import requests | |
import logging | |
app = Flask(__name__.split('.')[0]) | |
logging.basicConfig(level=logging.INFO) | |
APPROVED_HOSTS = set(["google.com", "www.google.com", "yahoo.com"]) | |
CHUNK_SIZE = 1024 | |
LOG = logging.getLogger("app.py") | |
@app.route('/<path:url>', methods=["GET", "POST"]) | |
def root(url): | |
# If referred from a proxy request, then redirect to a URL with the proxy prefix. | |
# This allows server-relative and protocol-relative URLs to work. | |
referer = request.headers.get('referer') | |
if not referer: | |
return Response("Relative URL sent without a a proxying request referal. Please specify a valid proxy host (/p/url)", 400) | |
proxy_ref = proxied_request_info(referer) | |
host = proxy_ref[0] | |
redirect_url = "/p/%s/%s%s" % (host, url, ("?" + request.query_string.decode('utf-8') if request.query_string else "")) | |
LOG.debug("Redirecting relative path to one under proxy: %s", redirect_url) | |
return redirect(redirect_url) | |
@app.route('/p/<path:url>', methods=["GET", "POST"]) | |
def proxy(url): | |
"""Fetches the specified URL and streams it out to the client. | |
If the request was referred by the proxy itself (e.g. this is an image fetch | |
for a previously proxied HTML page), then the original Referer is passed.""" | |
# Check if url to proxy has host only, and redirect with trailing slash | |
# (path component) to avoid breakage for downstream apps attempting base | |
# path detection | |
url_parts = urlparse('%s://%s' % (request.scheme, url)) | |
if url_parts.path == "": | |
parts = urlparse(request.url) | |
LOG.warning("Proxy request without a path was sent, redirecting assuming '/': %s -> %s/" % (url, url)) | |
return redirect(urlunparse(parts._replace(path=parts.path+'/'))) | |
LOG.debug("%s %s with headers: %s", request.method, url, request.headers) | |
r = make_request(url, request.method, dict(request.headers), request.form) | |
LOG.debug("Got %s response from %s",r.status_code, url) | |
headers = dict(r.raw.headers) | |
def generate(): | |
for chunk in r.raw.stream(decode_content=False): | |
yield chunk | |
out = Response(generate(), headers=headers) | |
out.status_code = r.status_code | |
return out | |
def make_request(url, method, headers={}, data=None): | |
url = 'http://%s' % url | |
# Ensure the URL is approved, else abort | |
if not is_approved(url): | |
LOG.warn("URL is not approved: %s", url) | |
abort(403) | |
# Pass original Referer for subsequent resource requests | |
referer = request.headers.get('referer') | |
if referer: | |
proxy_ref = proxied_request_info(referer) | |
headers.update({ "referer" : "http://%s/%s" % (proxy_ref[0], proxy_ref[1])}) | |
# Fetch the URL, and stream it back | |
LOG.debug("Sending %s %s with headers: %s and data %s", method, url, headers, data) | |
return requests.request(method, url, params=request.args, stream=True, headers=headers, allow_redirects=False, data=data) | |
def is_approved(url): | |
"""Indicates whether the given URL is allowed to be fetched. This | |
prevents the server from becoming an open proxy""" | |
parts = urlparse(url) | |
return parts.netloc in APPROVED_HOSTS | |
def proxied_request_info(proxy_url): | |
"""Returns information about the target (proxied) URL given a URL sent to | |
the proxy itself. For example, if given: | |
http://localhost:5000/p/google.com/search?q=foo | |
then the result is: | |
("google.com", "search?q=foo")""" | |
parts = urlparse(proxy_url) | |
if not parts.path: | |
return None | |
elif not parts.path.startswith('/p/'): | |
return None | |
matches = re.match('^/p/([^/]+)/?(.*)', parts.path) | |
proxied_host = matches.group(1) | |
proxied_path = matches.group(2) or '/' | |
proxied_tail = urlunparse(parts._replace(scheme="", netloc="", path=proxied_path)) | |
LOG.debug("Referred by proxy host, uri: %s, %s", proxied_host, proxied_tail) | |
return [proxied_host, proxied_tail] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment