Skip to content

Instantly share code, notes, and snippets.

@WangYihang
Last active April 12, 2025 02:55
Show Gist options
  • Save WangYihang/d340a4457eaea7a76fa7ef305e6990f0 to your computer and use it in GitHub Desktop.
Save WangYihang/d340a4457eaea7a76fa7ef305e6990f0 to your computer and use it in GitHub Desktop.
The Similarity of Anti-Bot Mechanisms (JavaScript Challenge)
import glob
import os
import ssdeep
import requests
def download_file(url, filename):
"""Download a file from a URL and save it to a local filename"""
print(f"Downloading {url} to {filename}")
response = requests.get(url, stream=True)
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "wb") as f:
f.write(response.content)
def prepare_data():
"""Prepare data by downloading files from a list of URLs"""
urls = [
("https://xueqiu.com/", "xueqiu/1.html"),
("https://xueqiu.com/", "xueqiu/2.html"),
("https://xueqiu.com/", "xueqiu/3.html"),
("https://raw.githubusercontent.com/WangYihang/acw-sc-v2.js/refs/heads/main/assets/index.html", "acw-sc-v2/1.html",),
("https://raw.githubusercontent.com/WangYihang/acw-sc-v2.js/refs/heads/main/assets/yundunwaf.html", "acw-sc-v2/2.html",),
]
for url, filename in urls:
filename = os.path.join("data", filename)
download_file(url, filename)
def process_html_files(pattern="data/**/*.html"):
"""Read HTML files and compute ssdeep fuzzy hashes for their content"""
filenames = glob.glob(pattern, recursive=True)
filenames.sort()
for filename in filenames:
with open(filename, encoding="utf-8") as f:
print(filename, ssdeep.hash(f.read()))
if __name__ == "__main__":
prepare_data()
process_html_files()
data/acw-sc-v2/1.html 384:6BHHEn03ZqkitPnE6hGezViI4n0bvudxIGd9UsEBt7P7XcTxdp82aH1zcRB26t22:n03j5leEn0bvudxIGd9Usot7P7sTxdpB
data/acw-sc-v2/2.html 384:JmBHHEn03ZqkitPnE6hGezViI4n0bvudxIGd9UsEBt7P7XcTxdp82aH1zcRB26tB:Jb03j5leEn0bvudxIGd9Usot7P7sTxdh
data/xueqiu/1.html 768:DHtIWKuiMknx9tH9WhH7MON4WilwonEV7:7CJf9+IOeW2woG
data/xueqiu/2.html 768:DHtIWKuiMknx9tH9WhH7MON4WilwonEVQ:7CJf9+IOeW2woh
data/xueqiu/3.html 768:DHtIWKuiMknx9tH9WhH7MON4WilwonEVd:7CJf9+IOeW2woU
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment