Created
July 30, 2021 09:52
-
-
Save WangYihang/1f95700a9c781441eca3e15dd01d08d4 to your computer and use it in GitHub Desktop.
URL content monitor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# encoding: utf-8 | |
import time | |
import requests | |
import difflib | |
''' | |
1. Config on remote server which runs this script | |
pip3 install notify-run | |
notify-run register | |
notify-run configure https://notify.run/[channel name] | |
2. Configure on your personal computer | |
2.1. Open https://notify.run/[channel name] in your browser | |
2.2. Click `Subscribe` button, then allow the prompt dialog | |
''' | |
def getTime(): | |
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |
def get(url): | |
headers = { | |
'Connection': 'keep-alive', | |
'Cache-Control': 'max-age=0', | |
'DNT': '1', | |
'Upgrade-Insecure-Requests': '1', | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', | |
'Sec-Fetch-Site': 'none', | |
'Sec-Fetch-Mode': 'navigate', | |
'Sec-Fetch-User': '?1', | |
'Sec-Fetch-Dest': 'document', | |
'Accept-Language': 'zh,en-US;q=0.9,en;q=0.8,zh-CN;q=0.7', | |
} | |
try: | |
return requests.get(url, headers=headers).content | |
except Exception as e: | |
return b"" | |
def md5(data): | |
import hashlib | |
return hashlib.md5(data).hexdigest() | |
def monitor(urls): | |
cache = {} | |
for url in urls: | |
cache[url] = None | |
round = 0 | |
while True: | |
round += 1 | |
for i, url in enumerate(urls): | |
current = get(url) | |
history = cache[url] | |
print("[{}] [{}] [{:02d} / {:02d}] {} [{} Bytes] - {}".format(getTime(), round, i + 1, len(urls), url, len(current), md5(current))) | |
if current != history: | |
# Download HTML | |
filename = "{}_{}_{}".format(md5(current), int(time.time()), url.split("/")[-1]) | |
print("[{}] -> {}".format(getTime(), filename)) | |
if history != None: | |
diffs = difflib.context_diff(str(history, encoding="utf-8"), str(current, encoding="utf-8")) | |
if len(diffs) > 0: | |
print("".join(diffs)) | |
with open(filename, "wb") as f: | |
f.write(current) | |
# Update cache | |
cache[url] = current | |
time.sleep(5) | |
def main(): | |
urls = [ | |
"https://geek.qq.com/tetris/index.html", | |
"https://geek.qq.com/tetris/js/tetris.config.js", | |
"https://geek.qq.com/tetris/js/tetris.core.js", | |
"https://geek.qq.com/tetris/js/tetris.game.js", | |
"https://geek.qq.com/tetris/js/share.js", | |
"https://geek.qq.com/tetris/js/page-intro.js", | |
"https://geek.qq.com/tetris/js/page-game.js", | |
"https://geek.qq.com/tetris/js/main.js", | |
] | |
monitor(urls) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import datetime | |
import pprint | |
import difflib | |
file_versions = {} | |
differ = difflib.Differ() | |
ENABLE_DIFF = False | |
for filepath in glob.glob("./*"): | |
fullname = os.path.basename(filepath) | |
item = fullname.split("_") | |
if len(item) == 3: | |
current_hash = item[0] | |
timestamp = int(item[1]) | |
filename = item[2] | |
if filename not in file_versions.keys(): | |
file_versions[filename] = [] | |
file_versions[filename].append((timestamp, current_hash)) | |
for filename, versions in file_versions.items(): | |
print(filename) | |
previous_hash = None | |
for timestamp, current_hash in versions: | |
current_content = str(open(glob.glob("{}*".format(current_hash))[0], "rb").read(), encoding="utf-8") | |
print("\t{} {} [{} Bytes]".format(datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S"), current_hash, len(current_content))) | |
if previous_hash != None and ENABLE_DIFF: | |
previous_content = str(open(glob.glob("{}*".format(previous_hash))[0], "rb").read(), encoding="utf-8") | |
result = list(difflib.context_diff(previous_content, current_content)) | |
pprint.pprint(result) | |
previous_hash = current_hash |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment