Last active
April 12, 2025 02:55
-
-
Save WangYihang/d340a4457eaea7a76fa7ef305e6990f0 to your computer and use it in GitHub Desktop.
The Similarity of Anti-Bot Mechanisms (JavaScript Challenge)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import ssdeep | |
import requests | |
def download_file(url, filename): | |
"""Download a file from a URL and save it to a local filename""" | |
print(f"Downloading {url} to {filename}") | |
response = requests.get(url, stream=True) | |
os.makedirs(os.path.dirname(filename), exist_ok=True) | |
with open(filename, "wb") as f: | |
f.write(response.content) | |
def prepare_data(): | |
"""Prepare data by downloading files from a list of URLs""" | |
urls = [ | |
("https://xueqiu.com/", "xueqiu/1.html"), | |
("https://xueqiu.com/", "xueqiu/2.html"), | |
("https://xueqiu.com/", "xueqiu/3.html"), | |
("https://raw.githubusercontent.com/WangYihang/acw-sc-v2.js/refs/heads/main/assets/index.html", "acw-sc-v2/1.html",), | |
("https://raw.githubusercontent.com/WangYihang/acw-sc-v2.js/refs/heads/main/assets/yundunwaf.html", "acw-sc-v2/2.html",), | |
] | |
for url, filename in urls: | |
filename = os.path.join("data", filename) | |
download_file(url, filename) | |
def process_html_files(pattern="data/**/*.html"): | |
"""Read HTML files and compute ssdeep fuzzy hashes for their content""" | |
filenames = glob.glob(pattern, recursive=True) | |
filenames.sort() | |
for filename in filenames: | |
with open(filename, encoding="utf-8") as f: | |
print(filename, ssdeep.hash(f.read())) | |
if __name__ == "__main__": | |
prepare_data() | |
process_html_files() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data/acw-sc-v2/1.html 384:6BHHEn03ZqkitPnE6hGezViI4n0bvudxIGd9UsEBt7P7XcTxdp82aH1zcRB26t22:n03j5leEn0bvudxIGd9Usot7P7sTxdpB | |
data/acw-sc-v2/2.html 384:JmBHHEn03ZqkitPnE6hGezViI4n0bvudxIGd9UsEBt7P7XcTxdp82aH1zcRB26tB:Jb03j5leEn0bvudxIGd9Usot7P7sTxdh | |
data/xueqiu/1.html 768:DHtIWKuiMknx9tH9WhH7MON4WilwonEV7:7CJf9+IOeW2woG | |
data/xueqiu/2.html 768:DHtIWKuiMknx9tH9WhH7MON4WilwonEVQ:7CJf9+IOeW2woh | |
data/xueqiu/3.html 768:DHtIWKuiMknx9tH9WhH7MON4WilwonEVd:7CJf9+IOeW2woU |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment