Created
December 9, 2017 15:57
-
-
Save rvrosm/7726d12ddd54a873e04a501dd65a2264 to your computer and use it in GitHub Desktop.
Bash / Awk script that converts GFWList into Privoxy action file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Generate Privoxy action file from gfwlist. | |
# $1 = proxy, example: socks5://127.0.0.1:9050 | |
url='https://raw.githubusercontent.com/gfwlist/tinylist/master/tinylist.txt' | |
url='https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt' | |
wget -qO- "$url"| base64 -d | | |
gawk -f <(sed '/^#<:>#/!d;s///' "$0") -v proxy="$1" -v verbose="$3" | |
#<:># # Convert AutoProxy rules to Privoxy action file. | |
#<:># # Reference: http://www.privoxy.org/user-manual/actions-file.html#AF-PATTERNS | |
#<:># # Requires Gawk | |
#<:># BEGIN { | |
#<:># FS = "/" | |
#<:># if (!proxy) | |
#<:># proxy = "socks5://127.0.0.1:9050" | |
#<:># split(proxy, a, /:\/\/|:|\s+/) | |
#<:># i = a[1] == "http" ? "" : sprintf("-%s", a[1]) | |
#<:># rule[0] = sprintf("{+forward-override{forward%s %s:%s .}}", i, a[2], a[3]) | |
#<:># rule[1] = "{+forward-override{forward .}}" | |
#<:># set[0][""]; delete set[0][""] | |
#<:># set[1][""]; delete set[1][""] | |
#<:># # Add custom patterns here | |
#<:># #set[0][".example.com"] | |
#<:># #set[0][".elpmaxe.com"] | |
#<:># #set[1]["direct.example.com"] | |
#<:># } | |
#<:># | |
#<:># { i = 0 } | |
#<:># | |
#<:># /^[![]|^\s*$/ { next } # ignored | |
#<:># | |
#<:># /^@@/ { # excluded | |
#<:># i = 1 | |
#<:># sub(/^@@/, "") | |
#<:># } | |
#<:># | |
#<:># /^([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\.([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3}(:[0-9]+)?$/ { # IP address | |
#<:># set[i][$0] | |
#<:># next | |
#<:># } | |
#<:># | |
#<:># # Special case for that long-ass google line: | |
#<:># # /^https?:\/\/([^\/]+\.)*google\.(ac|ad|...|vu|ws)\/.*/ | |
#<:># index($0, "/^https?:\\/\\/([^\\/]+\\.)*google\\.(") == 1 { | |
#<:># $0 = substr($0, 34) | |
#<:># sub(/......$/, "") | |
#<:># split($0, a, /\|/) | |
#<:># for (j in a) { | |
#<:># set[i][sprintf(".google.%s", a[j])] | |
#<:># } | |
#<:># next | |
#<:># } | |
#<:># | |
#<:># # Convert (rare) regexp patterns to domain patterns | |
#<:># # /^https?:\/\/[^\/]+blogspot\.(.*)/ => ||blogspot.* | |
#<:># /^\/\^https\?:\\\/\\\/\[\^\\\/\]\+[^/]+\/$/ { | |
#<:># $0 = substr($0, 20) | |
#<:># sub(/\/$/, "") | |
#<:># gsub(/[()]/, "") | |
#<:># gsub(/\.\*/, "*") | |
#<:># gsub(/\.\+/, "?*") | |
#<:># gsub(/\\/, "") | |
#<:># sub(/^/, "||") | |
#<:># } | |
#<:># | |
#<:># /^\/.*\/$/ { | |
#<:># unhandled["regexp"][$0] | |
#<:># next | |
#<:># } | |
#<:># | |
#<:># # ||foo*.bar => .foo*.bar | |
#<:># /^\|\|/ { # domain | |
#<:># host = substr($0, 3); | |
#<:># set[i][sprintf(".%s", host)] | |
#<:># # Remove redundant pattern | |
#<:># delete set[i][sprintf("%s:80", host)] | |
#<:># delete set[i][sprintf(".%s:80", host)] | |
#<:># delete set[i][sprintf("%s:443", host)] | |
#<:># delete set[i][sprintf(".%s:443", host)] | |
#<:># next | |
#<:># } | |
#<:># | |
#<:># # Fix up broken patterns | |
#<:># /^https?:\/\// { | |
#<:># sub(/^/, "|") | |
#<:># } | |
#<:># | |
#<:># # Support patterns which are basically missing |http://. | |
#<:># # Don't try to fix broken patterns such as: | |
#<:># # .bbc.co.uk*chinese | |
#<:># # .bbc.co*zhongwen | |
#<:># # bbs.sina.com%2F | |
#<:># # q%3Dfreedom | |
#<:># /^([[:alnum:]._~-]|[!'*+,;&=])+(\/([[:alnum:]._~-]|%[[:xdigit:]]{2}|[!'*+,;&=]|[@:])*(\?([[:alnum:]._~-]|%[[:xdigit:]]{2}|[!'*+,;&=]|[@/?:])*)?(#([[:alnum:]._~-]|%[[:xdigit:]]{2}|[!'*+,;&=]|[@/?:])*)?)*$/ { # pattern | |
#<:># sub(/^/, "|http://") | |
#<:># } | |
#<:># | |
#<:># # |http://foo.bar/*?q=x+y => foo.bar:80/.*\?q=x\+y | |
#<:># # |https://cdn*.foo.bar => cdn*.foo.bar:443 | |
#<:># /^\|https?:\/\// { # start of URL | |
#<:># port = $1 == "|http:" ? 80 : 443 | |
#<:># sub(/^\|https?:\/\//, "") | |
#<:># host = $1 | |
#<:># $0 = substr($0, length(host) + 1) # path | |
#<:># gsub(/[].?+(|)[]/, "\\\\&") | |
#<:># gsub(/\*/, ".*") | |
#<:># # Don't add redundant pattern | |
#<:># if (!(sprintf("%s", host) in set[i]) && | |
#<:># !(sprintf(".%s", host) in set[i]) && | |
#<:># !(sprintf("%s:%s", host, port) in set[i]) && | |
#<:># !(sprintf(".%s:%s", host, port) in set[i]) && | |
#<:># !(sprintf(".%s:%s%s", host, port, $0) in set[i])) { | |
#<:># set[i][sprintf("%s:%s%s", host, port, $0)] | |
#<:># } | |
#<:># next | |
#<:># } | |
#<:># | |
#<:># { | |
#<:># unhandled["pattern"][$0] | |
#<:># } | |
#<:># | |
#<:># END { | |
#<:># for (i=0;i<2;++i) { | |
#<:># asorti(set[i]) | |
#<:># print rule[i] | |
#<:># for (j in set[i]) { | |
#<:># print set[i][j] | |
#<:># } | |
#<:># } | |
#<:># if (!verbose) { | |
#<:># exit | |
#<:># } | |
#<:># for (i in unhandled) { | |
#<:># if (!isarray(unhandled[i])) | |
#<:># continue | |
#<:># asorti(unhandled[i]) | |
#<:># print "Warning: unhandled " i > "/dev/stderr" | |
#<:># for (j in unhandled[i]) { | |
#<:># print unhandled[i][j] > "/dev/stderr" | |
#<:># } | |
#<:># } | |
#<:># } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment