Last active
June 27, 2018 11:28
-
-
Save shaond/f1d5d6250a0411675990 to your computer and use it in GitHub Desktop.
Node.js code to proxy an upstream webpage using cheerio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'); | |
var express = require('express'); | |
var router = express.Router(); | |
/* GET users listing. */ | |
router.get('/', function(req, res) { | |
var request = require('request'); | |
var url = req.query.url; | |
// If our URL doesn't contain a protocol or | |
// ending slash, add it here | |
if (url.indexOf('://') === -1) { | |
url = 'http://' + url; | |
if (!url.match(/\/$/)) { | |
url += '/'; | |
} | |
} | |
var page = request.get(url, function(error, response, body) { | |
if (!error) { | |
var proxied = proxy(body, url); | |
res.header('Cache-Control', 'no-cache, private, no-store, must-revalidate, max-stale=0, post-check=0, pre-check=0'); | |
res.send(proxied); | |
} | |
}); | |
}); | |
function proxy(html, url) { | |
var cheerio = require('cheerio'); | |
var $ = cheerio.load(html); | |
var content = ''; | |
var base = '<base href=\'' + url + '\' />\n'; | |
$('head').append(base); | |
$('img[src^="public"]').each(function() { | |
$(this).attr('href', url + '/' + $(this).attr('href')); | |
}); | |
$('a:not([href^="http://"])' + | |
':not([href^="https://"])' + | |
':not([href^="//"])' + | |
':not([href^="javascript:"])') | |
.each(function() { | |
$(this).attr('href', url + $(this).attr('href')); | |
}); | |
$('img:not([src^="http://"])' + | |
':not([src^="https://"])' + | |
':not([src^="//"])') | |
.each(function() { | |
$(this).attr('src', url + $(this).attr('src')); | |
}); | |
$('link:not([href^="http://"])' + | |
':not([href^="https://"])' + | |
':not([href^="//"])') | |
.each(function() { | |
$(this).attr('href', url + $(this).attr('href')); | |
}); | |
$('script[src]:not([src^="http://"])' + | |
':not([src^="https://"])' + | |
':not([src^="//"])') | |
.each(function() { | |
$(this).attr('src', url + $(this).attr('src')); | |
}); | |
if (process.env.NODE_ENV === 'production') { | |
var mavenjs = '<script id="prod"></script>\n'; | |
$('a').each(function() { | |
$(this).attr('href', 'http://example.com/proxy?url=' + $(this).attr('href')); | |
}); | |
} | |
if (process.env.NODE_ENV === 'development') { | |
var mavenjs = '<script id="dev"></script>\n'; | |
$('a').each(function() { | |
$(this).attr('href', 'http://localhost:3000/proxy?url=' + $(this).attr('href')); | |
}); | |
} | |
$('body').append(mavenjs); | |
return $.html(); | |
}; | |
module.exports = router; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This code doesn't include the routes or surrounding Express code. Use with caution as it may not suit your requirements.