Skip to content

Instantly share code, notes, and snippets.

@Wh1terat
Last active June 11, 2023 12:21
Show Gist options
  • Save Wh1terat/309ca982aaa89ff9804d3e548ff6d5fc to your computer and use it in GitHub Desktop.
Save Wh1terat/309ca982aaa89ff9804d3e548ff6d5fc to your computer and use it in GitHub Desktop.
Inscrapesula
#!/usr/bin/env python3
"""
InSCRAPEsula v0.1
Inspired by and sections borrowed from https://github.com/ziplokk1/incapsula-cracker-py3"
"""
import logging
import re
from ast import literal_eval
from base64 import b64encode, b64decode
from random import random
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from requests import Session
from requests.utils import quote, urlparse
__title__ = 'inscrapesula'
__version__ = '0.1'
__author__ = 'Gareth Bryan'
import http.client
http.client.HTTPConnection.debuglevel = 1
logging.basicConfig(level=logging.DEBUG)
class ISession(Session):
"""
Subclass requests.Session
"""
BROWSER_CHECKS = [
("navigator", "true"),
("navigator.vendor", "Google Inc."),
("navigator.appName", "Netscape"),
("navigator.plugins.length==0", "false"),
("navigator.platform", "MacIntel"),
("navigator.webdriver", "undefined"),
("plugin_ext", "no extention"),
("ActiveXObject", "false"),
("webkitURL", "true"),
("_phantom", "false"),
("callPhantom", "false"),
("chrome", "true"),
("yandex", "false"),
("opera", "false"),
("opr", "false"),
("safari", "false"),
("awesomium", "false"),
("puffinDevice", "false"),
("__nightmare", "false"),
("domAutomation", "false"),
("domAutomationController", "false"),
("_Selenium_IDE_Recorder", "false"),
("document.__webdriver_script_fn", "false"),
("document.$cdc_asdjflasutopfhvcZLmcfl_", "false"),
("process.version", "false"),
("global", "false"),
("global.require", "false"),
("global.process", "false"),
("WebAssembly", "true"),
("window.toString()", "[object Window]"),
("navigator.cpuClass", "false"),
("navigator.oscpu", "false"),
("navigator.connection", "true"),
("navigator.language=='C'", "false"),
("window.outerWidth==0", "false"),
("window.outerHeight==0", "false"),
("window.WebGLRenderingContext", "true"),
("document.documentMode", "undefined"),
("eval.toString().length", "33"),
("'v2b147170f281b1d69c9de8f2e4c27a45252e1a6c83ec1dd95f7c3f5f5d987e9b'.toString()", "v2b147170f281b1d69c9de8f2e4c27a45252e1a6c83ec1dd95f7c3f5f5d987e9b"),
]
class IncapError(Exception):
"""
Base exception for all errors
"""
class MaxRetriesExceeded(IncapError):
"""
Raised when retries exceeds "max_attempts"
"""
class IncapBlocked(IncapError):
"""
Raised when Incapsula blocks with reCAPTCHA
"""
class IncapJSError(IncapError):
"""
Rasied when something goes wrong in the JS deobfuscation
"""
def __init__(self, **kwargs):
self.logger = logging.getLogger('inscrapesula')
self.max_attempts = kwargs.pop('attempts', 3)
super().__init__(**kwargs)
def get(self, url, **kwargs):
"""
Override requests.session.get()
:params: url: string URL for the new request object
:params: **kwargs: dict pass kwargs to original get function
:return: Response object
"""
kwargs.setdefault('allow_redirects', True)
# if bypass kwarg is set, don't try to solve
if kwargs.pop('bypass', False):
return self.request('GET', url, **kwargs)
return self._solve(self.request('GET', url, **kwargs))
def _solve(self, resp, orig=None, attempts=0):
"""
Attempt to bypass incapsula
:param resp: Response to check.
:param org: Original response. Used only when called recursively.
:param tries: int Number of attempts. Used when called recursively.
:return:
"""
orig = orig or resp
if self.max_attempts is not None and attempts >= self.max_attempts:
raise self.MaxRetriesExceeded()
url = urlparse(orig.url)
try:
script = self._get_script(url, resp.text)
except self.IncapBlocked as err:
raise err
else:
if script:
self._generate_cookie(url.hostname, script)
self._send_ack(url)
return self._solve(
self.get(orig.url, bypass=True),
orig=orig,
attempts=attempts + 1
)
return resp
def _get_script(self, url, content):
"""
Check page response for recpatcha or js challenge
:param content: string page
:return: string js src
"""
parser = BeautifulSoup(content, 'html.parser')
iframe = parser.find(
"iframe",
id="main-iframe",
src=re.compile("/_Incapsula_Resource?"),
)
if iframe:
raise self.IncapBlocked(iframe.contents[0])
script = parser.find(
"script", src=re.compile("/_Incapsula_Resource?")
)
if script:
self.logger.debug('Script Found: %s', script['src'])
# Download the incapsula js
req = '{}://{}{}'.format(url.scheme, url.netloc, script['src'])
res = self.get(req, bypass=True)
# Extract the true body from the first level of obfuscation
# e.g
#
# (function() {
# var z = "";
# var b = "766172205f3078326630633d5b275...*SNIP*";
# eval((function() {
# for (var i = 0; i < b.length; i += 2) {
# z += String.fromCharCode(parseInt(b.substring(i, i + 2), 16));
# }
# return z;
# })());
# })();
#
js_code = re.search(r'"";var [a-z]="([^"]*)', res.text)
if js_code is None:
raise self.IncapJSError('Failed to extract js')
return bytes.fromhex(js_code.group(1)).decode('utf-8')
return None
def _send_ack(self, url):
"""
Send Ack (don't know what else to call it?)
:param url: string original url
:return: bool
"""
req = '{}://{}/_Incapsula_Resource?SWKMTFSR=1&e={}'.format(
url.scheme,
url.netloc,
random()
)
self.logger.debug('Sending Ack: %s', req)
res = self.get(req, bypass=True)
if res.text == '1':
return True
return False
def _generate_cookie(self, domain, data):
"""
Generate ___utmvc cookie and add it to requests.session.cookies
:param domain: string domain name used for cookie creation
:param data: string content of _Incapsula_Resource JS
"""
try:
# attempt to get decipher string arrays and get token
token = self._get_token(data)
# grab incapsula session cookies
cookies = [
cookie.value
for cookie in self.cookies
if cookie.name.startswith("incap_ses_")
]
if not cookies:
raise self.IncapError('No Incapsula session cookies found!')
# urlencoded list of tuples containing 'good' browser responses
browser_checks = ','.join(
[
quote("=".join(check), safe="()'")
for check in self.BROWSER_CHECKS
]
)
# simple digest
digest = ''.join(
[
self._checksum(browser_checks + cookie)
for cookie in cookies
]
)
# signature
signature = ''.join(
[
"{:x}".format(
ord(b) + ord(digest[i % len(digest)])
)
for i, b in enumerate(token)
]
)
# create final cookie value
value = b64encode(
(
b"%b,digest=%s,s=%s"
% (
self._rc4(browser_checks, token[:5]),
digest.encode(),
signature.encode(),
)
)
).decode("utf-8")
except self.IncapError as err:
raise err
else:
self.logger.debug(
"Setting ___utmvc cookie for '%s': %s", domain, value
)
# original cookie expiry is 20 seconds
expires = round(
datetime.timestamp(
datetime.now() + timedelta(seconds=20)
)
)
self.cookies.set(
"___utmvc",
value,
domain=domain,
path="/",
expires=expires,
)
def _get_token(self, js_code):
"""
Get token
:param js_code: string output of _getJS
:return: string token
"""
token = None
arrays = {}
# Find all string array definitions
# e.g
#
# var _0x2f0c = ['wpMbIsOc', 'w57CmAU=', 'JMKPw4c=',...*SNIP*];
#
for array in re.finditer(r"var (_0x\w+)=(\['.*?\])", js_code):
# Find string array init arguments and decoder function name
# e.g
#
# }(_0x2f0c, 0xb5));var _0xc2f0 = function(_0x4296cc, _0x2adc0c) {
#
init_rxp = r"{},(0x\w+)\)\);var (_0x\w+)".format(
re.escape(array.group(1))
)
for init in re.finditer(init_rxp, js_code):
arrname = init.group(2)
arrays.update(
{arrname: literal_eval(array.group(2))}
)
# the number of times to "rotate" (shift) the array
i = int(init.group(1), 16) % len(arrays[arrname])
# shift the array by n
arrays[arrname] = arrays[arrname][i:] + arrays[arrname][:i]
if not arrays:
raise self.IncapJSError('Failed to process string array')
token_call = re.search(r"(_0x\w+)\('(\w+)', ([0-9a-fx\s\+_]+)\)",js_code)
if token_call is None:
raise self.IncapJSError('Failed to find call to cipher token')
token_call_key_var = token_call.group(3)
token_method =1
if "+" in token_call_key_var:
token_var1, token_var2 = token_call.group(3).split("+")
token_method = 2
else:
token_var1 = token_call.group(3)
token_method = 1
if token_method == 1:
self.logger.debug('Token method 1 in use')
token_var1 = re.search(r"var {}=(_0x\w+);".format(token_var1),js_code)
if token_var1 is None:
raise self.IncapJSError('Failed to find token variable 1 using method 1')
token_1 = re.search(r"var {}='([a-fx0-9\\]+)';".format(token_var1.group(1)), js_code)
if token_1 is None:
raise self.IncapJSError('Failed to find token key using method 1')
key = token_1.group(1).encode('utf-8').decode('unicode_escape').encode('utf-8')
elif token_method == 2:
self.logger.debug('Token method 2 in use')
token_var1 = re.search(r"var {}=(_0x\w+)\['\\x73\\x75\\x62\\x73\\x74\\x72'\]\((0x[0-9a-f]+),(0x[0-9a-f]+)\);".format(token_var1),js_code)
if token_var1 is None:
raise self.IncapJSError('Failed to find token variable 1 using method 2')
token_var1_substr = (
int(token_var1.group(2),16),
int(token_var1.group(3),16)-1
)
token_1 = re.search(r"var {}='([a-fx0-9\\]+)';".format(token_var1.group(1)), js_code)
if token_var1 is None:
print(js_code)
raise self.IncapJSError('Failed to find token variable 1 using method 2')
token_1 = token_1.group(1).encode('utf-8').decode('unicode_escape')
token_1 = token_1[token_var1_substr[0]:token_var1_substr[1]]
token_var2 = re.search(r"var {}=(_0x\w+)\['\\x73\\x75\\x62\\x73\\x74\\x72'\]\((0x[0-9a-f]+)\);".format(token_var2), js_code)
if token_var2 is None:
print(js_code)
raise self.IncapJSError('Failed to find token variable 2 using method 2')
token_var2_substr = int(token_var1.group(2),16)
token_2 = re.search(r"var {}='([a-fx0-9\\]+)';".format(token_var2.group(1)),js_code)
if token_var2 is None:
raise self.IncapJSError('Failed to find token variable 2 using method 2')
token_2 = token_2.group(1).encode('utf-8').decode('unicode_escape')
token_2 = token_2[token_var2_substr:]
key = '{}{}'.format(token_1,token_2).encode('utf-8')
key = key.decode('utf-8')
arrname = token_call.group(1)
i = int(token_call.group(2), 16)
enc = b64decode(arrays[arrname][i]).decode('utf-8')
token = self._rc4(enc, key).decode('utf-8')
self.logger.debug('Token found: %s' % token)
return token
@staticmethod
def _checksum(data):
"""
Checksum
:param data: data to checksum
:return: string checksum
"""
return str(sum(ord(c) for c in data))
@staticmethod
def _rc4(message, key):
"""
RC4 Implementation.
:param data: message to cipher
:param key: string rc4 key
:return: bytearray ciphertext
"""
cipher_text = bytearray()
key = [ord(c) for c in key]
sbox = list(range(256))
j = 0
for i in range(256):
j = (j + sbox[i] + key[i % len(key)]) % 256
sbox[i], sbox[j] = sbox[j], sbox[i]
i = j = 0
for byte in message:
i = (i + 1) % 256
j = (j + sbox[i]) % 256
sbox[i], sbox[j] = sbox[j], sbox[i]
key_stream = sbox[(sbox[i] + sbox[j]) % 256]
cipher_text.append(key_stream ^ ord(byte))
return cipher_text
@Wh1terat
Copy link
Author

ah yes, that was one thing I did see - totally inconsistent as to which template incap use between requests.
I remember there were 2-3 a few years ago, maybe more now.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment