Skip to content

Instantly share code, notes, and snippets.

@amake
Last active December 13, 2015 20:39
Show Gist options
  • Save amake/4971790 to your computer and use it in GitHub Desktop.
Save amake/4971790 to your computer and use it in GitHub Desktop.
A script to analyze a TMX file and try to figure out how much time has been spent translating.
'''
TMXTimekeeper.py
Analyze a TMX file and try to figure out how much time
has been spent translating. Assume a minimum of 5 minutes
for translating "sessions".
Created on 2013/02/17
@author: Aaron Madlon-Kay
'''
import sys
import os
from xml.etree import ElementTree
from datetime import datetime, timedelta, tzinfo
import time
BIN_DELTA = timedelta(hours=1)
MIN_WORK_TIME = timedelta(minutes=5)
class TimeZone(tzinfo):
def __init__(self, offset, name=None):
self.__offset = offset
self.__name = name if name != None else "UTC+{0}".format(offset)
def utcoffset(self, dt):
return timedelta(seconds=self.__offset)
def dst(self, dt):
return timedelta(0)
def tzname(self):
return self.__name
# From StackExchange: http://stackoverflow.com/a/13406277/448068
def local_time_offset(t=None):
"""Return offset of local zone from GMT, either at present or at time t."""
# python2.3 localtime() can't take None
if t is None:
t = time.time()
if time.localtime(t).tm_isdst and time.daylight:
return -time.altzone
else:
return -time.timezone
UTC = TimeZone(0)
USER_TZ = TimeZone(local_time_offset())
def get_dates(tree):
'''
Extract translation timestamps as //tuv@changedate or //tuv@creationdate
strings, and return them as a sorted list of datetimes.
'''
result = []
for tuv in tree.findall(".//tuv"):
creation_date = tuv.get("creationdate")
change_date = tuv.get("changedate")
try:
if creation_date != None:
dt = datetime.strptime(creation_date, "%Y%m%dT%H%M%SZ")
result.append(dt.replace(tzinfo=UTC))
if change_date != None and change_date != creation_date:
dt = datetime.strptime(change_date, "%Y%m%dT%H%M%SZ")
result.append(dt.replace(tzinfo=UTC))
except ValueError, e:
pass
return sorted(result)
def bin_dates(dates):
'''
Given a sorted list of datetimes, reduce to a set of "bins"
(start and end times), defined as a period of time where no two
datetimes are separated by more than BIN_DELTA.
'''
result = {}
if len(dates) < 1:
return result
start = dates[0]
result[start] = None
for date in dates:
end = result[start]
if end == None:
end = start
if date - end < BIN_DELTA:
result[start] = date
else:
start = date
result[start] = None
return result
def print_bins(bins):
'''
Given a dictionary of bins, print the date, start and end
times, and duration. Finally, print the total time covered
by all bins.
'''
total = timedelta(0)
master_adjusted = ""
for start, end in sorted(bins.items()):
local_start = start.astimezone(USER_TZ)
print "{0}: {1} -".format(local_start.strftime("%Y/%m/%d"),
local_start.strftime("%H:%M")),
adjusted = ""
# A bin may have end == None if just one TUV was changed.
if end == None:
end = start
local_end = end.astimezone(USER_TZ)
delta = local_end - local_start
# Ensure minimum work time
if delta < MIN_WORK_TIME:
delta = MIN_WORK_TIME
local_end = local_start + MIN_WORK_TIME
adjusted = "*"
master_adjusted = "*"
total += delta
print local_end.strftime("%H:%M"), " ({0}{1})".format(str(delta),
adjusted)
print "Total: {0}{1}".format(str(total), master_adjusted)
if __name__ == "__main__":
if len(sys.argv) < 2 or not os.path.isfile(sys.argv[1]) \
or not sys.argv[1].endswith(".tmx"):
print "Please specify a valid input TMX"
sys.exit(1)
try:
tree = ElementTree.parse(sys.argv[1])
dates = get_dates(tree)
bins = bin_dates(dates)
print_bins(bins)
except ElementTree.ParseError:
print "Invalid input TMX"
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment