Skip to content

Instantly share code, notes, and snippets.

@tuck1s
Last active November 21, 2022 11:10
Show Gist options
  • Save tuck1s/49099a42e0503cc5137d8cde04852e8a to your computer and use it in GitHub Desktop.
Save tuck1s/49099a42e0503cc5137d8cde04852e8a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import re, sys, csv, argparse
from datetime import datetime
myTimezone = '' # does rclone log in locale timezone?
def perror(str):
print(str, file=sys.stderr)
# Function operates "in place" by reference on dict d, i.e. has side-effects on d
def rescaleUnits(d, field, unit):
# retype data from string to float
d[field] = float(d[field])
if d[unit].startswith('Gi'):
d[field] *= 2^30
d[unit] = d[unit][2:] # trim the scaling
elif d[unit].startswith('Mi'):
d[field] *= 2^20
d[unit] = d[unit][2:] # trim the scaling
elif d[unit].startswith('Ki'):
d[field] *= 2^10
d[unit] = d[unit][2:] # trim the scaling
elif d[unit].startswith('B'):
pass
else:
raise ValueError('Unknown unit:', unit)
d[field] = round(d[field], 2) # only need two decimal places
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process human-readable default rclone log files into a .CSV file.')
parser.add_argument('files', metavar='file', type=argparse.FileType('r'), nargs='+', help='log filename')
args = parser.parse_args()
for f in args.files:
time = '0'
checks = {}
outfile = None
for line in f:
# TODO - compare against rclone format-string outputs, timezone definition etc
# Example -
# "2022/11/10 12:02:34 INFO : "
m = re.search(r'^(?P<date>\d+/\d+/\d+)\s+(?P<time>\d+:\d+:\d+)\sINFO', line)
if(m):
datetimeStr = m.group('date').replace('/', '-') + 'T' + m.group('time') + myTimezone
try:
time = datetime.fromisoformat(datetimeStr)
except Exception as err:
perror(err)
continue
# Example -
# "Transferred: 337.485 GiB / 339.789 GiB, 99%, 996.918 KiB/s, ETA 40m23s"
m = re.search(r'^Transferred:\s+(?P<tx_progress>\d+[\.]*\d*)\s+(?P<tx_progress_unit>[A-Za-z]+)' +
r'\s+\/\s+(?P<tx_target>\d+[\.]*\d*)\s+(?P<tx_target_unit>[A-Za-z]+)' +
r'\,\s*(?P<tx_progress_percent>\d+)\%' +
r'\,\s*(?P<tx_throughput>\d+[\.]*\d*)\s+(?P<tx_throughput_unit>[A-Za-z]+\/s)' +
r'\,\s*ETA\s*(?P<tx_ETA>[A-Za-z0-9]+)'
, line)
if(m):
try:
# join datetime (from the INFO line) with data from the Transferred: line
outData = {'datetime': time.isoformat()}
outData.update(checks)
tx = m.groupdict()
# map "human-readable" units back to simple values
rescaleUnits(tx, 'tx_progress', 'tx_progress_unit')
rescaleUnits(tx, 'tx_target', 'tx_target_unit')
rescaleUnits(tx, 'tx_throughput', 'tx_throughput_unit')
outData.update(tx)
if not outfile:
outfile = csv.DictWriter(sys.stdout, fieldnames=outData.keys())
outfile.writeheader()
outfile.writerow(outData)
except Exception as err:
perror(err)
continue
# Example -
# "Checks: 27284 / 30289, 90%"
m = re.search(r'^Checks:\s+(?P<checks>\d+)\s+\/\s+(?P<checks_denom>\d+)', line) #
if(m):
try:
checks = m.groupdict()
except Exception as err:
perror(err)
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment