Skip to content

Instantly share code, notes, and snippets.

@xiconet
Last active March 28, 2020 14:01
Show Gist options
  • Save xiconet/09df2bf913b0ea9056ae3a12c6cc4891 to your computer and use it in GitHub Desktop.
Save xiconet/09df2bf913b0ea9056ae3a12c6cc4891 to your computer and use it in GitHub Desktop.
cloudflare under-attack mode challenge solver
package scraper
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/http/cookiejar"
"net/url"
"regexp"
//"strconv"
"strings"
"time"
"github.com/robertkrimen/otto"
)
//const userAgent = `Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36`
const userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.9) Gecko/20100101 Goanna/2.2 Firefox/38.9 PaleMoon/26.5.0"
type Transport struct {
upstream http.RoundTripper
cookies http.CookieJar
}
func NewClient() (c *http.Client, err error) {
scraper_transport, err := NewTransport(http.DefaultTransport)
if err != nil {
return
}
c = &http.Client{
Transport: scraper_transport,
Jar: scraper_transport.cookies,
}
return
}
func NewTransport(upstream http.RoundTripper) (*Transport, error) {
jar, err := cookiejar.New(nil)
if err != nil {
return nil, err
}
return &Transport{upstream, jar}, nil
}
func (t Transport) RoundTrip(r *http.Request) (*http.Response, error) {
if r.Header.Get("User-Agent") == "" {
r.Header.Set("User-Agent", userAgent)
}
if r.Header.Get("Referer") == "" {
r.Header.Set("Referer", r.URL.String())
}
fmt.Println("round-tripping")
resp, err := t.upstream.RoundTrip(r)
if err != nil {
return nil, err
}
// Check if Cloudflare anti-bot is on
server_header := resp.Header.Get("Server")
if resp.StatusCode == 503 && (server_header == "cloudflare-nginx" || server_header == "cloudflare") {
log.Printf("Solving challenge for %s", resp.Request.URL.Hostname())
resp, err := t.solveChallenge(resp)
return resp, err
}
return resp, err
}
var jschlRegexp = regexp.MustCompile(`name="jschl_vc" value="(\w+)"`)
var passRegexp = regexp.MustCompile(`name="pass" value="(.+?)"`)
func (t Transport) solveChallenge(resp *http.Response) (*http.Response, error) {
time.Sleep(time.Second * 4) // Cloudflare requires a delay before solving the challenge
b, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, err
}
resp.Body = ioutil.NopCloser(bytes.NewReader(b))
var params = make(url.Values)
if m := jschlRegexp.FindStringSubmatch(string(b)); len(m) > 0 {
params.Set("jschl_vc", m[1])
}
if m := passRegexp.FindStringSubmatch(string(b)); len(m) > 0 {
params.Set("pass", m[1])
}
chkURL, _ := url.Parse("/cdn-cgi/l/chk_jschl")
u := resp.Request.URL.ResolveReference(chkURL)
fmt.Println("u:", u) //debug
//x-added with mod of t.extractJS() args
host_len := fmt.Sprintf("%d", len(resp.Request.URL.Host))
js, err := t.extractJS(string(b), host_len)
if err != nil {
return nil, err
}
answer, err := t.evaluateJS(js)
if err != nil {
return nil, err
}
params.Set("jschl_answer", answer)
u.RawQuery = params.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", resp.Request.Header.Get("User-Agent"))
req.Header.Set("Referer", resp.Request.URL.String())
log.Printf("Requesting %s", u.String())
client := http.Client{
Transport: t.upstream,
Jar: t.cookies,
}
resp, err = client.Do(req)
fmt.Println("solveChallenge:", resp.Status)
if err != nil {
return nil, err
}
return resp, nil
}
func (t Transport) evaluateJS(js string) (string, error) {
vm := otto.New()
result, err := vm.Run(js)
if err != nil {
//return 0, err
return "", err
}
fmt.Printf("evalJS result: %v\n", result) // debug
//return result.ToInteger()
return result.ToString()
}
var jsRegexp = regexp.MustCompile(
`setTimeout\(function\(\){\s+(var ` +
`s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n`,
)
//var jsReplace1Regexp = regexp.MustCompile(`a\.value = (parseInt\(.+?\)).+`)
var jsReplace1Regexp = regexp.MustCompile(`a\.value = (.+ \+ t\.length).+`)
var jsReplace2Regexp = regexp.MustCompile(`\s{3,}[a-z](?: = |\.).+`)
var jsReplace3Regexp = regexp.MustCompile(`[\n\\']`)
func (t Transport) extractJS(body string, hlen string) (string, error) {
matches := jsRegexp.FindStringSubmatch(body)
if len(matches) == 0 {
return "", errors.New("No matching javascript found")
}
js := matches[1]
js = jsReplace1Regexp.ReplaceAllString(js, "$1")
js = jsReplace2Regexp.ReplaceAllString(js, "")
//x-added:
js = strings.Replace(js, "t.length", hlen, -1)
// Strip characters that could be used to exit the string context
// These characters are not currently used in Cloudflare's arithmetic snippet
js = jsReplace3Regexp.ReplaceAllString(js, "")
return js, nil
}
@xiconet
Copy link
Author

xiconet commented Jan 27, 2019

Modified from go-cloudflare-scraper to adapt to some changes in the cloudflare JavaScript challenge.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment