Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
Created May 27, 2013 15:04
Show Gist options
  • Save hoehrmann/5657535 to your computer and use it in GitHub Desktop.
Save hoehrmann/5657535 to your computer and use it in GitHub Desktop.
Implementation of the parsing and resolution algorithms in RFC 3986. Originally http://lists.w3.org/Archives/Public/www-archive/2011Aug/0001.html as amended by http://lists.w3.org/Archives/Public/www-archive/2011Aug/0003.html
package RFC3986;
use strict;
use warnings;
sub parse {
$_[0] =~ m|^(([^:/?\#]+):)?(//([^/?\#]*))?
([^?\#]*)(\?([^\#]*))?(\#(.*))?$|x;
# $_[0] =~ m|^(([a-zA-Z0-9+.-]+):)?(//([^/?\#]*))?
# ([^?\#]*)(\?([^\#]*))?(\#(.*))?$|x;
return scheme => $2,
authority => $4,
path => $5,
query => $7,
fragment => $9;
}
sub merge {
my ($base, $ref, $base_has_authority) = @_;
return "/$ref" if $base eq "" and $base_has_authority;
return "$1$ref" if $base =~ m|^(.*?)([^/]*)$|s;
}
sub remove_dot_segments {
my $in = shift;
my $ou = "";
while (length $in) {
next if $in =~ s!^\.\.?/!!;
next if $in =~ s!(^/\.(/|$))!/!;
next if $in =~ s!^/\.\.(/|$)!/! and $ou =~ s!/?[^/]*$!!;
next if $in =~ s!^\.\.?$!!;
$in =~ s!^(/?[^/]*)!!;
$ou .= $1;
}
return $ou;
}
sub transform {
my %R = parse(shift);
my %Base = parse(shift);
my %T;
if (defined $R{scheme}) {
$T{scheme} = $R{scheme};
$T{authority} = $R{authority};
$T{path} = $R{path};
$T{query} = $R{query};
} else {
if (defined $R{authority}) {
$T{authority} = $R{authority};
$T{path} = remove_dot_segments($R{path});
$T{query} = $R{query};
} else {
if ($R{path} eq "") {
$T{path} = $Base{path};
if (defined $R{query}) {
$T{query} = $R{query};
} else {
$T{query} = $Base{query};
}
} else {
if ($R{path} =~ m|^/|) {
$T{path} = remove_dot_segments($R{path});
} else {
$T{path} = merge($Base{path}, $R{path},
defined $Base{authority}); # Bug in RFC 3986
$T{path} = remove_dot_segments($T{path});
}
$T{query} = $R{query};
}
$T{authority} = $Base{authority};
}
$T{scheme} = $Base{scheme};
}
$T{fragment} = $R{fragment};
return %T;
}
sub compose {
my %U = @_;
my $result = "";
$result .= $U{scheme} . ":" if defined $U{scheme};
$result .= "//" . $U{authority} if defined $U{authority};
$result .= $U{path};
$result .= "?" . $U{query} if defined $U{query};
$result .= "#" . $U{fragment} if defined $U{fragment};
return $result;
}
@k-payl
Copy link

k-payl commented May 27, 2013

what do it parse?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment