Last active
November 21, 2022 11:10
-
-
Save tuck1s/49099a42e0503cc5137d8cde04852e8a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import re, sys, csv, argparse | |
from datetime import datetime | |
myTimezone = '' # does rclone log in locale timezone? | |
def perror(str): | |
print(str, file=sys.stderr) | |
# Function operates "in place" by reference on dict d, i.e. has side-effects on d | |
def rescaleUnits(d, field, unit): | |
# retype data from string to float | |
d[field] = float(d[field]) | |
if d[unit].startswith('Gi'): | |
d[field] *= 2^30 | |
d[unit] = d[unit][2:] # trim the scaling | |
elif d[unit].startswith('Mi'): | |
d[field] *= 2^20 | |
d[unit] = d[unit][2:] # trim the scaling | |
elif d[unit].startswith('Ki'): | |
d[field] *= 2^10 | |
d[unit] = d[unit][2:] # trim the scaling | |
elif d[unit].startswith('B'): | |
pass | |
else: | |
raise ValueError('Unknown unit:', unit) | |
d[field] = round(d[field], 2) # only need two decimal places | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Process human-readable default rclone log files into a .CSV file.') | |
parser.add_argument('files', metavar='file', type=argparse.FileType('r'), nargs='+', help='log filename') | |
args = parser.parse_args() | |
for f in args.files: | |
time = '0' | |
checks = {} | |
outfile = None | |
for line in f: | |
# TODO - compare against rclone format-string outputs, timezone definition etc | |
# Example - | |
# "2022/11/10 12:02:34 INFO : " | |
m = re.search(r'^(?P<date>\d+/\d+/\d+)\s+(?P<time>\d+:\d+:\d+)\sINFO', line) | |
if(m): | |
datetimeStr = m.group('date').replace('/', '-') + 'T' + m.group('time') + myTimezone | |
try: | |
time = datetime.fromisoformat(datetimeStr) | |
except Exception as err: | |
perror(err) | |
continue | |
# Example - | |
# "Transferred: 337.485 GiB / 339.789 GiB, 99%, 996.918 KiB/s, ETA 40m23s" | |
m = re.search(r'^Transferred:\s+(?P<tx_progress>\d+[\.]*\d*)\s+(?P<tx_progress_unit>[A-Za-z]+)' + | |
r'\s+\/\s+(?P<tx_target>\d+[\.]*\d*)\s+(?P<tx_target_unit>[A-Za-z]+)' + | |
r'\,\s*(?P<tx_progress_percent>\d+)\%' + | |
r'\,\s*(?P<tx_throughput>\d+[\.]*\d*)\s+(?P<tx_throughput_unit>[A-Za-z]+\/s)' + | |
r'\,\s*ETA\s*(?P<tx_ETA>[A-Za-z0-9]+)' | |
, line) | |
if(m): | |
try: | |
# join datetime (from the INFO line) with data from the Transferred: line | |
outData = {'datetime': time.isoformat()} | |
outData.update(checks) | |
tx = m.groupdict() | |
# map "human-readable" units back to simple values | |
rescaleUnits(tx, 'tx_progress', 'tx_progress_unit') | |
rescaleUnits(tx, 'tx_target', 'tx_target_unit') | |
rescaleUnits(tx, 'tx_throughput', 'tx_throughput_unit') | |
outData.update(tx) | |
if not outfile: | |
outfile = csv.DictWriter(sys.stdout, fieldnames=outData.keys()) | |
outfile.writeheader() | |
outfile.writerow(outData) | |
except Exception as err: | |
perror(err) | |
continue | |
# Example - | |
# "Checks: 27284 / 30289, 90%" | |
m = re.search(r'^Checks:\s+(?P<checks>\d+)\s+\/\s+(?P<checks_denom>\d+)', line) # | |
if(m): | |
try: | |
checks = m.groupdict() | |
except Exception as err: | |
perror(err) | |
continue |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment