Last active
July 15, 2020 02:21
-
-
Save kanzure/b806e07bbe83357c6f9864b7644d40e9 to your computer and use it in GitHub Desktop.
tool for timestamping IRC logs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Timestamps! | |
This file creates opentimestamps timestamps for daily IRC logs. It works when | |
running on a daily basis (called from a cronjob) or when called manually with | |
multiple days since the last run. | |
The script will identify all new log files and create timestamps for those new | |
logfiles. | |
The script will not timestamp "today's" log until tomorrow because the log is | |
incomplete until it is closed out for the day. | |
:author: Bryan Bishop <[email protected]> | |
:date: 2020-02-10 | |
""" | |
import os | |
import sh | |
from datetime import datetime, timedelta | |
import itertools | |
# Enable DRY_RUN by setting this value to True to use readonly mode. | |
DRY_RUN = False | |
# List of directories that have relevant IRC logs. If the IRC logs were more | |
# organized (like "they are all under the directory irclogs/") then this list | |
# wouldn't be needed.... oh well. | |
DIRNAMES = [ | |
"logs/", | |
"bitcoin-core-dev/", | |
"bitcoin-wizards/", | |
"bitmetas/", | |
"ctv-bip-review/", | |
"c-lightning/", | |
"lightning-dev/", | |
"lnd/", | |
"rust-bitcoin/", | |
"secp256k1/", | |
"joinmarket/", | |
"bitcoin-builds/", | |
"braidpool/", | |
"taproot-bip-review/", | |
"utreexo/", | |
] | |
def fromisoformat(date_fragment): | |
""" | |
Apparently datetime.fromisoformat() is unavailable in older versions of | |
python3... | |
""" | |
return datetime.strptime(date_fragment, "%Y-%m-%d") | |
def grouper(n, iterable): | |
""" | |
Convert a list of items (iterable) into a list of lists each of some | |
maximum size (n). | |
""" | |
args = [iter(iterable)] * n | |
return ([e for e in t if e != None] for t in itertools.zip_longest(*args)) | |
def get_last_few_days(n): | |
""" | |
Get a list of datetimes objects for the past few days. | |
""" | |
# Use x+2 because yesterday's log is not ready yet (the timestamp was just | |
# created a moment ago). | |
dates = [(datetime.now() - timedelta(days=x+2)) for x in range(0, n)] | |
return dates | |
def get_last_few_days_date_fragments(n): | |
""" | |
Convert the last few days (from datetimes) into strings with "%Y-%m-%d" | |
format, to match the basename of the IRC log filenames without the file | |
extension. | |
""" | |
dates = get_last_few_days(n) | |
date_fragments = [date.strftime("%Y-%m-%d") for date in dates] | |
return date_fragments | |
def upgrade_recent_timestamps(dirname, daysback=3): | |
""" | |
Upgrade the past few days of timestamps. | |
""" | |
timestamp_dirpath = os.path.join(dirname, "timestamps") | |
date_fragments = get_last_few_days_date_fragments(n=daysback) | |
timestamp_filenames = [os.path.join(timestamp_dirpath, date_fragment + ".log.ots") for date_fragment in date_fragments] | |
if daysback > 400: | |
raise Exception("grouping not implemented for ots upgrade") | |
existant_files = [] | |
for timestamp_filename in timestamp_filenames: | |
if not os.path.exists(timestamp_filename): | |
print("Can't upgrade {} because the file doesn't exist".format(timestamp_filename)) | |
continue | |
else: | |
existant_files.append(timestamp_filename) | |
print("Upgrading {} timestamps for {}".format(len(existant_files), dirname)) | |
try: | |
sh.ots("upgrade", *existant_files) | |
except Exception as exc: | |
print("Encountered an exception when upgrading timestamps, the files were: {}".format(existant_files)) | |
print("Continuing...") | |
else: | |
print("Done upgrading timestamps.") | |
# get the latest time stamped date | |
def get_most_recent_timestamp(timestamp_dir): | |
""" | |
Find the youngest timestamp in the timestamps directory. | |
""" | |
timestamp_filenames = os.listdir(timestamp_dir) | |
dates = [] | |
for timestamp_filename in timestamp_filenames: | |
# get just the filename, not any of the path components | |
timestamp_filename = os.path.basename(timestamp_filename) | |
# skip files that aren't .log.ots files | |
if ".log.ots" not in timestamp_filename or ".log.ots.bak" in timestamp_filename or "-initial" in timestamp_filename: | |
continue | |
#print("timestamp filename: {}".format(timestamp_filename)) | |
date_fragment = timestamp_filename[: 0 - len(".log.ots")] | |
date = fromisoformat(date_fragment) | |
dates.append(date) | |
# datetime sorting gives the youngest timestamp at the end of the list | |
sorted_dates = sorted(dates) | |
if len(sorted_dates) == 0: | |
return datetime.strptime("1900-01-01", "%Y-%m-%d") | |
# Most recent one is going to now be at the end of the list, so get the | |
# last item from the list. | |
latest_timestamp = sorted_dates[-1] | |
return latest_timestamp | |
def make_timestamps(dirname): | |
""" | |
Call opentimestamps (ots stamp) for each new logfile that hasn't been | |
timestamped yet, based on the assumption that the youngest timestamp (by | |
filename) in the timestamps/ directory is where you left off timestamping. | |
""" | |
timestamp_dirname = os.path.join(dirname, "timestamps") | |
latest_timestamp = get_most_recent_timestamp(timestamp_dirname) | |
todays_date_fragment = datetime.now().strftime("%Y-%m-%d") | |
#todays_date_fragment = fromisoformat(datetime.strptime("2020-05-10", "%Y-%m-%d") | |
potential_log_files = sorted(os.listdir(dirname)) | |
log_filenames = [] | |
for log_filename in potential_log_files: | |
log_filename = os.path.join(dirname, log_filename) | |
# All log files are in %Y-%m-%d format. However, some other files are | |
# named .log that aren't date logs. These should be skipped- do so by | |
# checking if the first character is "2". | |
# Also: check that the filename has ".log" in it. | |
if ".log" in log_filename and log_filename[-4:] == ".log" and os.path.basename(log_filename)[0] in ["2", "1"] and ".log.ots.bak" not in log_filename and "-initial" not in log_filename: | |
date_fragment = os.path.basename(log_filename)[:-4] | |
date = fromisoformat(date_fragment) | |
if date > latest_timestamp and date_fragment != todays_date_fragment: | |
# We don't timestamp log files for today's date because the log | |
# is only finished until after the current day passes. | |
log_filenames.append(log_filename) | |
if len(potential_log_files) == 0: | |
raise Exception("No log files found.") | |
log_filename_groups = list(grouper(500, log_filenames)) | |
print("Created {} groups".format(len(log_filename_groups))) | |
for subgroup_log_filenames in log_filename_groups: | |
filename_args = [os.path.abspath(log_filename) for log_filename in subgroup_log_filenames] | |
print("Timestamping a group with these files: {}".format(filename_args)) | |
if not DRY_RUN: | |
# --nowatch is from the watchlist branch | |
sh.ots("stamp", "--nowatch", *filename_args) | |
for log_filename in subgroup_log_filenames: | |
timestamp_filename = log_filename + ".ots" | |
print("Moving timestamp {}".format(timestamp_filename)) | |
# --no-clobber to not overwrite existing timestamps | |
sh.mv("--no-clobber", timestamp_filename, timestamp_dirname) | |
# Add this timestamp to the ots watchlist. | |
fd = open(os.path.expanduser("~/.cache/ots/watchlist"), "a") | |
fd.write(os.path.abspath(os.path.join(timestamp_dirname, os.path.basename(timestamp_filename))) + "\n") | |
fd.close() | |
elif DRY_RUN: | |
for log_filename in subgroup_log_filenames: | |
timestamp_filename = log_filename + ".ots" | |
print("Moving timestamp {}".format(timestamp_filename)) | |
if __name__ == "__main__": | |
# This script should be executing in the parent directory. | |
#chdir_dest = os.path.dirname(os.path.abspath(DIRNAMES[0])) | |
#print("Changing working directory to {}".format(chdir_dest)) | |
#os.chdir(chdir_dest) | |
for dirname in DIRNAMES: | |
print("Processing {}".format(dirname)) | |
make_timestamps(dirname) | |
# Upgrading takes a while so let's wait to upgrade until after new | |
# timestamping is completed. | |
for dirname in DIRNAMES: | |
# daysback must be at least 3, because the youngest timestamp that we | |
# can upgrade is by definition yesterday's yesterday's log's timestamp, | |
# not yesterday's log's timestamp which was just created a few moments | |
# ago and is by definition not mature yet. | |
upgrade_recent_timestamps(dirname, daysback=5) | |
# The local ots version is using the watchlist branch. | |
# https://github.com/opentimestamps/opentimestamps-client/pull/109 | |
print("Running ots upgradewatchlist ... (will upgrade more than just IRC log timestamps, but whatever)") | |
try: | |
sh.ots("upgradewatchlist") | |
except Exception: | |
print("Got an error while running upgradewatchlist. Ignoring (probably immature timestamp).") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment