Skip to content

Instantly share code, notes, and snippets.

@WangYihang
Created July 30, 2021 09:52
Show Gist options
  • Save WangYihang/1f95700a9c781441eca3e15dd01d08d4 to your computer and use it in GitHub Desktop.
Save WangYihang/1f95700a9c781441eca3e15dd01d08d4 to your computer and use it in GitHub Desktop.
URL content monitor
#!/usr/bin/env python3
# encoding: utf-8
import time
import requests
import difflib
'''
1. Config on remote server which runs this script
pip3 install notify-run
notify-run register
notify-run configure https://notify.run/[channel name]
2. Configure on your personal computer
2.1. Open https://notify.run/[channel name] in your browser
2.2. Click `Subscribe` button, then allow the prompt dialog
'''
def getTime():
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
def get(url):
headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'DNT': '1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
'Sec-Fetch-Dest': 'document',
'Accept-Language': 'zh,en-US;q=0.9,en;q=0.8,zh-CN;q=0.7',
}
try:
return requests.get(url, headers=headers).content
except Exception as e:
return b""
def md5(data):
import hashlib
return hashlib.md5(data).hexdigest()
def monitor(urls):
cache = {}
for url in urls:
cache[url] = None
round = 0
while True:
round += 1
for i, url in enumerate(urls):
current = get(url)
history = cache[url]
print("[{}] [{}] [{:02d} / {:02d}] {} [{} Bytes] - {}".format(getTime(), round, i + 1, len(urls), url, len(current), md5(current)))
if current != history:
# Download HTML
filename = "{}_{}_{}".format(md5(current), int(time.time()), url.split("/")[-1])
print("[{}] -> {}".format(getTime(), filename))
if history != None:
diffs = difflib.context_diff(str(history, encoding="utf-8"), str(current, encoding="utf-8"))
if len(diffs) > 0:
print("".join(diffs))
with open(filename, "wb") as f:
f.write(current)
# Update cache
cache[url] = current
time.sleep(5)
def main():
urls = [
"https://geek.qq.com/tetris/index.html",
"https://geek.qq.com/tetris/js/tetris.config.js",
"https://geek.qq.com/tetris/js/tetris.core.js",
"https://geek.qq.com/tetris/js/tetris.game.js",
"https://geek.qq.com/tetris/js/share.js",
"https://geek.qq.com/tetris/js/page-intro.js",
"https://geek.qq.com/tetris/js/page-game.js",
"https://geek.qq.com/tetris/js/main.js",
]
monitor(urls)
if __name__ == "__main__":
main()
import glob
import os
import datetime
import pprint
import difflib
file_versions = {}
differ = difflib.Differ()
ENABLE_DIFF = False
for filepath in glob.glob("./*"):
fullname = os.path.basename(filepath)
item = fullname.split("_")
if len(item) == 3:
current_hash = item[0]
timestamp = int(item[1])
filename = item[2]
if filename not in file_versions.keys():
file_versions[filename] = []
file_versions[filename].append((timestamp, current_hash))
for filename, versions in file_versions.items():
print(filename)
previous_hash = None
for timestamp, current_hash in versions:
current_content = str(open(glob.glob("{}*".format(current_hash))[0], "rb").read(), encoding="utf-8")
print("\t{} {} [{} Bytes]".format(datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S"), current_hash, len(current_content)))
if previous_hash != None and ENABLE_DIFF:
previous_content = str(open(glob.glob("{}*".format(previous_hash))[0], "rb").read(), encoding="utf-8")
result = list(difflib.context_diff(previous_content, current_content))
pprint.pprint(result)
previous_hash = current_hash
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment