Last active
March 28, 2020 14:01
-
-
Save xiconet/09df2bf913b0ea9056ae3a12c6cc4891 to your computer and use it in GitHub Desktop.
cloudflare under-attack mode challenge solver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package scraper | |
import ( | |
"bytes" | |
"errors" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"net/http" | |
"net/http/cookiejar" | |
"net/url" | |
"regexp" | |
//"strconv" | |
"strings" | |
"time" | |
"github.com/robertkrimen/otto" | |
) | |
//const userAgent = `Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36` | |
const userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.9) Gecko/20100101 Goanna/2.2 Firefox/38.9 PaleMoon/26.5.0" | |
type Transport struct { | |
upstream http.RoundTripper | |
cookies http.CookieJar | |
} | |
func NewClient() (c *http.Client, err error) { | |
scraper_transport, err := NewTransport(http.DefaultTransport) | |
if err != nil { | |
return | |
} | |
c = &http.Client{ | |
Transport: scraper_transport, | |
Jar: scraper_transport.cookies, | |
} | |
return | |
} | |
func NewTransport(upstream http.RoundTripper) (*Transport, error) { | |
jar, err := cookiejar.New(nil) | |
if err != nil { | |
return nil, err | |
} | |
return &Transport{upstream, jar}, nil | |
} | |
func (t Transport) RoundTrip(r *http.Request) (*http.Response, error) { | |
if r.Header.Get("User-Agent") == "" { | |
r.Header.Set("User-Agent", userAgent) | |
} | |
if r.Header.Get("Referer") == "" { | |
r.Header.Set("Referer", r.URL.String()) | |
} | |
fmt.Println("round-tripping") | |
resp, err := t.upstream.RoundTrip(r) | |
if err != nil { | |
return nil, err | |
} | |
// Check if Cloudflare anti-bot is on | |
server_header := resp.Header.Get("Server") | |
if resp.StatusCode == 503 && (server_header == "cloudflare-nginx" || server_header == "cloudflare") { | |
log.Printf("Solving challenge for %s", resp.Request.URL.Hostname()) | |
resp, err := t.solveChallenge(resp) | |
return resp, err | |
} | |
return resp, err | |
} | |
var jschlRegexp = regexp.MustCompile(`name="jschl_vc" value="(\w+)"`) | |
var passRegexp = regexp.MustCompile(`name="pass" value="(.+?)"`) | |
func (t Transport) solveChallenge(resp *http.Response) (*http.Response, error) { | |
time.Sleep(time.Second * 4) // Cloudflare requires a delay before solving the challenge | |
b, err := ioutil.ReadAll(resp.Body) | |
resp.Body.Close() | |
if err != nil { | |
return nil, err | |
} | |
resp.Body = ioutil.NopCloser(bytes.NewReader(b)) | |
var params = make(url.Values) | |
if m := jschlRegexp.FindStringSubmatch(string(b)); len(m) > 0 { | |
params.Set("jschl_vc", m[1]) | |
} | |
if m := passRegexp.FindStringSubmatch(string(b)); len(m) > 0 { | |
params.Set("pass", m[1]) | |
} | |
chkURL, _ := url.Parse("/cdn-cgi/l/chk_jschl") | |
u := resp.Request.URL.ResolveReference(chkURL) | |
fmt.Println("u:", u) //debug | |
//x-added with mod of t.extractJS() args | |
host_len := fmt.Sprintf("%d", len(resp.Request.URL.Host)) | |
js, err := t.extractJS(string(b), host_len) | |
if err != nil { | |
return nil, err | |
} | |
answer, err := t.evaluateJS(js) | |
if err != nil { | |
return nil, err | |
} | |
params.Set("jschl_answer", answer) | |
u.RawQuery = params.Encode() | |
req, err := http.NewRequest("GET", u.String(), nil) | |
if err != nil { | |
return nil, err | |
} | |
req.Header.Set("User-Agent", resp.Request.Header.Get("User-Agent")) | |
req.Header.Set("Referer", resp.Request.URL.String()) | |
log.Printf("Requesting %s", u.String()) | |
client := http.Client{ | |
Transport: t.upstream, | |
Jar: t.cookies, | |
} | |
resp, err = client.Do(req) | |
fmt.Println("solveChallenge:", resp.Status) | |
if err != nil { | |
return nil, err | |
} | |
return resp, nil | |
} | |
func (t Transport) evaluateJS(js string) (string, error) { | |
vm := otto.New() | |
result, err := vm.Run(js) | |
if err != nil { | |
//return 0, err | |
return "", err | |
} | |
fmt.Printf("evalJS result: %v\n", result) // debug | |
//return result.ToInteger() | |
return result.ToString() | |
} | |
var jsRegexp = regexp.MustCompile( | |
`setTimeout\(function\(\){\s+(var ` + | |
`s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n`, | |
) | |
//var jsReplace1Regexp = regexp.MustCompile(`a\.value = (parseInt\(.+?\)).+`) | |
var jsReplace1Regexp = regexp.MustCompile(`a\.value = (.+ \+ t\.length).+`) | |
var jsReplace2Regexp = regexp.MustCompile(`\s{3,}[a-z](?: = |\.).+`) | |
var jsReplace3Regexp = regexp.MustCompile(`[\n\\']`) | |
func (t Transport) extractJS(body string, hlen string) (string, error) { | |
matches := jsRegexp.FindStringSubmatch(body) | |
if len(matches) == 0 { | |
return "", errors.New("No matching javascript found") | |
} | |
js := matches[1] | |
js = jsReplace1Regexp.ReplaceAllString(js, "$1") | |
js = jsReplace2Regexp.ReplaceAllString(js, "") | |
//x-added: | |
js = strings.Replace(js, "t.length", hlen, -1) | |
// Strip characters that could be used to exit the string context | |
// These characters are not currently used in Cloudflare's arithmetic snippet | |
js = jsReplace3Regexp.ReplaceAllString(js, "") | |
return js, nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Modified from go-cloudflare-scraper to adapt to some changes in the cloudflare JavaScript challenge.