Skip to content

Instantly share code, notes, and snippets.

@rvrosm
Created December 9, 2017 15:57
Show Gist options
  • Save rvrosm/7726d12ddd54a873e04a501dd65a2264 to your computer and use it in GitHub Desktop.
Save rvrosm/7726d12ddd54a873e04a501dd65a2264 to your computer and use it in GitHub Desktop.
Bash / Awk script that converts GFWList into Privoxy action file
#!/bin/bash
#
# Generate Privoxy action file from gfwlist.
# $1 = proxy, example: socks5://127.0.0.1:9050
url='https://raw.githubusercontent.com/gfwlist/tinylist/master/tinylist.txt'
url='https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
wget -qO- "$url"| base64 -d |
gawk -f <(sed '/^#<:>#/!d;s///' "$0") -v proxy="$1" -v verbose="$3"
#<:># # Convert AutoProxy rules to Privoxy action file.
#<:># # Reference: http://www.privoxy.org/user-manual/actions-file.html#AF-PATTERNS
#<:># # Requires Gawk
#<:># BEGIN {
#<:># FS = "/"
#<:># if (!proxy)
#<:># proxy = "socks5://127.0.0.1:9050"
#<:># split(proxy, a, /:\/\/|:|\s+/)
#<:># i = a[1] == "http" ? "" : sprintf("-%s", a[1])
#<:># rule[0] = sprintf("{+forward-override{forward%s %s:%s .}}", i, a[2], a[3])
#<:># rule[1] = "{+forward-override{forward .}}"
#<:># set[0][""]; delete set[0][""]
#<:># set[1][""]; delete set[1][""]
#<:># # Add custom patterns here
#<:># #set[0][".example.com"]
#<:># #set[0][".elpmaxe.com"]
#<:># #set[1]["direct.example.com"]
#<:># }
#<:>#
#<:># { i = 0 }
#<:>#
#<:># /^[![]|^\s*$/ { next } # ignored
#<:>#
#<:># /^@@/ { # excluded
#<:># i = 1
#<:># sub(/^@@/, "")
#<:># }
#<:>#
#<:># /^([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\.([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3}(:[0-9]+)?$/ { # IP address
#<:># set[i][$0]
#<:># next
#<:># }
#<:>#
#<:># # Special case for that long-ass google line:
#<:># # /^https?:\/\/([^\/]+\.)*google\.(ac|ad|...|vu|ws)\/.*/
#<:># index($0, "/^https?:\\/\\/([^\\/]+\\.)*google\\.(") == 1 {
#<:># $0 = substr($0, 34)
#<:># sub(/......$/, "")
#<:># split($0, a, /\|/)
#<:># for (j in a) {
#<:># set[i][sprintf(".google.%s", a[j])]
#<:># }
#<:># next
#<:># }
#<:>#
#<:># # Convert (rare) regexp patterns to domain patterns
#<:># # /^https?:\/\/[^\/]+blogspot\.(.*)/ => ||blogspot.*
#<:># /^\/\^https\?:\\\/\\\/\[\^\\\/\]\+[^/]+\/$/ {
#<:># $0 = substr($0, 20)
#<:># sub(/\/$/, "")
#<:># gsub(/[()]/, "")
#<:># gsub(/\.\*/, "*")
#<:># gsub(/\.\+/, "?*")
#<:># gsub(/\\/, "")
#<:># sub(/^/, "||")
#<:># }
#<:>#
#<:># /^\/.*\/$/ {
#<:># unhandled["regexp"][$0]
#<:># next
#<:># }
#<:>#
#<:># # ||foo*.bar => .foo*.bar
#<:># /^\|\|/ { # domain
#<:># host = substr($0, 3);
#<:># set[i][sprintf(".%s", host)]
#<:># # Remove redundant pattern
#<:># delete set[i][sprintf("%s:80", host)]
#<:># delete set[i][sprintf(".%s:80", host)]
#<:># delete set[i][sprintf("%s:443", host)]
#<:># delete set[i][sprintf(".%s:443", host)]
#<:># next
#<:># }
#<:>#
#<:># # Fix up broken patterns
#<:># /^https?:\/\// {
#<:># sub(/^/, "|")
#<:># }
#<:>#
#<:># # Support patterns which are basically missing |http://.
#<:># # Don't try to fix broken patterns such as:
#<:># # .bbc.co.uk*chinese
#<:># # .bbc.co*zhongwen
#<:># # bbs.sina.com%2F
#<:># # q%3Dfreedom
#<:># /^([[:alnum:]._~-]|[!'*+,;&=])+(\/([[:alnum:]._~-]|%[[:xdigit:]]{2}|[!'*+,;&=]|[@:])*(\?([[:alnum:]._~-]|%[[:xdigit:]]{2}|[!'*+,;&=]|[@/?:])*)?(#([[:alnum:]._~-]|%[[:xdigit:]]{2}|[!'*+,;&=]|[@/?:])*)?)*$/ { # pattern
#<:># sub(/^/, "|http://")
#<:># }
#<:>#
#<:># # |http://foo.bar/*?q=x+y => foo.bar:80/.*\?q=x\+y
#<:># # |https://cdn*.foo.bar => cdn*.foo.bar:443
#<:># /^\|https?:\/\// { # start of URL
#<:># port = $1 == "|http:" ? 80 : 443
#<:># sub(/^\|https?:\/\//, "")
#<:># host = $1
#<:># $0 = substr($0, length(host) + 1) # path
#<:># gsub(/[].?+(|)[]/, "\\\\&")
#<:># gsub(/\*/, ".*")
#<:># # Don't add redundant pattern
#<:># if (!(sprintf("%s", host) in set[i]) &&
#<:># !(sprintf(".%s", host) in set[i]) &&
#<:># !(sprintf("%s:%s", host, port) in set[i]) &&
#<:># !(sprintf(".%s:%s", host, port) in set[i]) &&
#<:># !(sprintf(".%s:%s%s", host, port, $0) in set[i])) {
#<:># set[i][sprintf("%s:%s%s", host, port, $0)]
#<:># }
#<:># next
#<:># }
#<:>#
#<:># {
#<:># unhandled["pattern"][$0]
#<:># }
#<:>#
#<:># END {
#<:># for (i=0;i<2;++i) {
#<:># asorti(set[i])
#<:># print rule[i]
#<:># for (j in set[i]) {
#<:># print set[i][j]
#<:># }
#<:># }
#<:># if (!verbose) {
#<:># exit
#<:># }
#<:># for (i in unhandled) {
#<:># if (!isarray(unhandled[i]))
#<:># continue
#<:># asorti(unhandled[i])
#<:># print "Warning: unhandled " i > "/dev/stderr"
#<:># for (j in unhandled[i]) {
#<:># print unhandled[i][j] > "/dev/stderr"
#<:># }
#<:># }
#<:># }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment