Skip to content

Instantly share code, notes, and snippets.

@shaond
Last active June 27, 2018 11:28
Show Gist options
  • Save shaond/f1d5d6250a0411675990 to your computer and use it in GitHub Desktop.
Save shaond/f1d5d6250a0411675990 to your computer and use it in GitHub Desktop.
Node.js code to proxy an upstream webpage using cheerio
var http = require('http');
var express = require('express');
var router = express.Router();
/* GET users listing. */
router.get('/', function(req, res) {
var request = require('request');
var url = req.query.url;
// If our URL doesn't contain a protocol or
// ending slash, add it here
if (url.indexOf('://') === -1) {
url = 'http://' + url;
if (!url.match(/\/$/)) {
url += '/';
}
}
var page = request.get(url, function(error, response, body) {
if (!error) {
var proxied = proxy(body, url);
res.header('Cache-Control', 'no-cache, private, no-store, must-revalidate, max-stale=0, post-check=0, pre-check=0');
res.send(proxied);
}
});
});
function proxy(html, url) {
var cheerio = require('cheerio');
var $ = cheerio.load(html);
var content = '';
var base = '<base href=\'' + url + '\' />\n';
$('head').append(base);
$('img[src^="public"]').each(function() {
$(this).attr('href', url + '/' + $(this).attr('href'));
});
$('a:not([href^="http://"])' +
':not([href^="https://"])' +
':not([href^="//"])' +
':not([href^="javascript:"])')
.each(function() {
$(this).attr('href', url + $(this).attr('href'));
});
$('img:not([src^="http://"])' +
':not([src^="https://"])' +
':not([src^="//"])')
.each(function() {
$(this).attr('src', url + $(this).attr('src'));
});
$('link:not([href^="http://"])' +
':not([href^="https://"])' +
':not([href^="//"])')
.each(function() {
$(this).attr('href', url + $(this).attr('href'));
});
$('script[src]:not([src^="http://"])' +
':not([src^="https://"])' +
':not([src^="//"])')
.each(function() {
$(this).attr('src', url + $(this).attr('src'));
});
if (process.env.NODE_ENV === 'production') {
var mavenjs = '<script id="prod"></script>\n';
$('a').each(function() {
$(this).attr('href', 'http://example.com/proxy?url=' + $(this).attr('href'));
});
}
if (process.env.NODE_ENV === 'development') {
var mavenjs = '<script id="dev"></script>\n';
$('a').each(function() {
$(this).attr('href', 'http://localhost:3000/proxy?url=' + $(this).attr('href'));
});
}
$('body').append(mavenjs);
return $.html();
};
module.exports = router;
@shaond
Copy link
Author

shaond commented Jul 23, 2014

This code doesn't include the routes or surrounding Express code. Use with caution as it may not suit your requirements.

@maxdignan
Copy link

This looks pretty great!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment