Skip to content

Instantly share code, notes, and snippets.

@DementedEarplug
Last active November 11, 2024 18:10
Show Gist options
  • Save DementedEarplug/a434e2dbcf6df09341fc309f5035f00f to your computer and use it in GitHub Desktop.
Save DementedEarplug/a434e2dbcf6df09341fc309f5035f00f to your computer and use it in GitHub Desktop.
Nginx + perl
FROM debian:bookworm-slim
# Install build dependencies and Perl modules
RUN apt-get update && apt-get install -y \
build-essential \
libpcre3 \
libpcre3-dev \
zlib1g \
zlib1g-dev \
libssl-dev \
perl \
libperl-dev \
wget \
libwww-perl \
libhtml-tree-perl \
liburi-escape-xs-perl \
cpanminus \
&& rm -rf /var/lib/apt/lists/*
# Download and compile NGINX
ENV NGINX_VERSION=1.24.0
RUN wget http://nginx.org/download/nginx-${NGINX_VERSION}.tar.gz && \
tar -zxf nginx-${NGINX_VERSION}.tar.gz && \
cd nginx-${NGINX_VERSION} && \
./configure \
--prefix=/etc/nginx \
--sbin-path=/usr/sbin/nginx \
--modules-path=/usr/lib/nginx/modules \
--conf-path=/etc/nginx/nginx.conf \
--error-log-path=/var/log/nginx/error.log \
--http-log-path=/var/log/nginx/access.log \
--pid-path=/var/run/nginx.pid \
--lock-path=/var/run/nginx.lock \
--with-http_perl_module=dynamic \
--with-threads \
--with-file-aio \
--with-http_ssl_module && \
make && \
make install
# Create necessary directories
RUN mkdir -p /etc/nginx/perl
# Copy configuration files
COPY nginx.conf /etc/nginx/nginx.conf
COPY proxy_handler.pm /etc/nginx/perl/
# Fix the Nginx.pm module name
RUN sed -i 's/use Nginx;/use nginx;/' /etc/nginx/perl/proxy_handler.pm
EXPOSE 3001
# Use proper nginx command instead of tail
CMD ["nginx", "-g", "daemon off;"]
# CMD ["tail", "-f", "/dev/null"]
load_module /usr/lib/nginx/modules/ngx_http_perl_module.so;
# Add at the top level (outside http block)
error_log /var/log/nginx/error.log debug;
http {
perl_modules /etc/nginx/perl/;
perl_require proxy_handler.pm;
include /etc/nginx/mime.types;
default_type application/octet-stream;
server {
listen 3001;
# Add CORS headers
add_header 'Access-Control-Allow-Origin' '*';
add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS';
add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Accept';
location /proxy {
if ($request_method = 'OPTIONS') {
add_header 'Access-Control-Allow-Origin' '*';
add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS';
add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Accept';
add_header 'Access-Control-Max-Age' 1728000;
add_header 'Content-Type' 'text/plain; charset=utf-8';
add_header 'Content-Length' 0;
return 204;
}
perl proxy_handler::handle;
}
# Optional: Add a simple test endpoint
location /test {
perl '
sub {
my $r = shift;
$r->send_http_header("text/plain");
return OK if $r->header_only;
$r->print("Perl module is working!\n");
return OK;
}
';
}
}
}
events {
worker_connections 1024;
}
package proxy_handler;
use strict;
use warnings;
use nginx;
use LWP::UserAgent;
use HTML::TreeBuilder;
use URI::Escape;
sub handle {
# Get the request object passed to this handler
my $r = shift;
warn "[DEBUG] Starting proxy handler";
# Decode the URL parameter from the request
my $url = uri_unescape($r->variable("arg_url"));
warn "[DEBUG] Proxying request for URL: $url";
# Create a new HTTP client object
my $ua = LWP::UserAgent->new;
warn "[DEBUG] Created UserAgent";
# Make the HTTP request to fetch the target URL
warn "[DEBUG] Making request to target URL";
my $response = $ua->get($url);
warn "[DEBUG] Received response with status: " . $response->status_line;
# Check if the request was successful
if ($response->is_success) {
# Get the content type header from the response
my $content_type = $response->header('Content-Type');
warn "[DEBUG] Response content type: $content_type";
# Get the decoded response content
my $content = $response->decoded_content;
warn "[DEBUG] Decoded response content length: " . length($content);
# If this is HTML content, we need to modify it
if ($content_type && $content_type =~ /text\/html/) {
warn "[DEBUG] Processing HTML content";
# Create an HTML parser object and parse the content
my $tree = HTML::TreeBuilder->new_from_content($content);
warn "[DEBUG] Created HTML tree";
# Process all stylesheet links - rewrite their href attributes to go through our proxy
my $stylesheet_count = 0;
for my $link ($tree->look_down(_tag => 'link', rel => 'stylesheet')) {
$link->attr('href', "/proxy/" . uri_escape($link->attr('href'))) if $link->attr('href');
$stylesheet_count++;
}
warn "[DEBUG] Processed $stylesheet_count stylesheet links";
# Process all images - rewrite their src attributes to go through our proxy
my $image_count = 0;
for my $img ($tree->look_down(_tag => 'img')) {
$img->attr('src', "/proxy/" . uri_escape($img->attr('src'))) if $img->attr('src');
$image_count++;
}
warn "[DEBUG] Processed $image_count images";
# Process all anchor tags - rewrite hrefs and force same window targeting
my $anchor_count = 0;
for my $anchor ($tree->look_down(_tag => 'a')) {
my $href = $anchor->attr('href');
if ($href) {
# Get the protocol and host from the request
my $protocol = $r->headers_in->{'X-Forwarded-Proto'} || 'http';
my $host = $r->headers_in->{'Host'};
# Create the full proxy URL
my $proxy_url = "$protocol://$host/proxy/" . uri_escape($href);
$anchor->attr('href', $proxy_url);
$anchor->attr('target', '_self'); # Force links to open in same window
$anchor_count++;
}
}
warn "[DEBUG] Processed $anchor_count anchor tags";
# Process iframes - add autoplay=0 parameter for video platforms
my $iframe_count = 0;
for my $iframe ($tree->look_down(_tag => 'iframe')) {
my $src = $iframe->attr('src');
if ($src && ($src =~ /youtube\.com|vimeo\.com/)) {
# Add autoplay=0 parameter, handling existing query parameters
$iframe->attr('src', $src =~ /\?/ ? "$src&autoplay=0" : "$src?autoplay=0");
$iframe_count++;
}
}
warn "[DEBUG] Processed $iframe_count video iframes";
# Process video tags - remove autoplay attributes
my $video_count = 0;
for my $video ($tree->look_down(_tag => 'video')) {
$video->attr('autoplay', undef);
$video->attr('playsinline', undef);
$video_count++;
}
warn "[DEBUG] Processed $video_count video tags";
# Create and inject a JavaScript element to handle client-side behavior
warn "[DEBUG] Creating JavaScript injection";
my $script = HTML::Element->new(
'script',
type => 'text/javascript'
);
# Add JavaScript code for link interception and video control
$script->push_content(q{
// Intercept clicks on proxied links to handle them properly
document.addEventListener('click', (evt) => {
const target = evt.target;
if (target && target.getAttribute('href')?.startsWith('/proxy/')) {
evt.preventDefault();
window.location.href = target.getAttribute('href');
}
});
// Ensure all videos are paused when the page loads
document.addEventListener('DOMContentLoaded', function() {
document.querySelectorAll('video').forEach(video => video.pause());
});
});
warn "[DEBUG] Added client-side JavaScript";
# Add the script to the body of the document
my $body = $tree->look_down(_tag => 'body');
if ($body) {
$body->push_content($script);
warn "[DEBUG] Injected JavaScript into body";
} else {
warn "[DEBUG] Warning: Could not find body tag for JavaScript injection";
}
# Convert the modified tree back to HTML
$content = $tree->as_HTML;
warn "[DEBUG] Converted modified tree back to HTML, length: " . length($content);
# Clean up the tree to prevent memory leaks
$tree = $tree->delete;
warn "[DEBUG] Cleaned up HTML tree";
}
# Send the appropriate content type header
warn "[DEBUG] Sending response headers with content type: $content_type";
$r->send_http_header($content_type);
# Send the content to the client
warn "[DEBUG] Sending content to client, length: " . length($content);
$r->print($content);
} else {
# If the request failed, send an error response
warn "[DEBUG] Request failed with error: " . $response->status_line;
$r->send_http_header("text/plain");
$r->status(500);
$r->print("Error fetching content");
}
warn "[DEBUG] Completed proxy handler";
# Return OK status to nginx
return OK;
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment