Last active
December 13, 2015 20:39
-
-
Save amake/4971790 to your computer and use it in GitHub Desktop.
A script to analyze a TMX file and try to figure out how much time has been spent translating.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
TMXTimekeeper.py | |
Analyze a TMX file and try to figure out how much time | |
has been spent translating. Assume a minimum of 5 minutes | |
for translating "sessions". | |
Created on 2013/02/17 | |
@author: Aaron Madlon-Kay | |
''' | |
import sys | |
import os | |
from xml.etree import ElementTree | |
from datetime import datetime, timedelta, tzinfo | |
import time | |
BIN_DELTA = timedelta(hours=1) | |
MIN_WORK_TIME = timedelta(minutes=5) | |
class TimeZone(tzinfo): | |
def __init__(self, offset, name=None): | |
self.__offset = offset | |
self.__name = name if name != None else "UTC+{0}".format(offset) | |
def utcoffset(self, dt): | |
return timedelta(seconds=self.__offset) | |
def dst(self, dt): | |
return timedelta(0) | |
def tzname(self): | |
return self.__name | |
# From StackExchange: http://stackoverflow.com/a/13406277/448068 | |
def local_time_offset(t=None): | |
"""Return offset of local zone from GMT, either at present or at time t.""" | |
# python2.3 localtime() can't take None | |
if t is None: | |
t = time.time() | |
if time.localtime(t).tm_isdst and time.daylight: | |
return -time.altzone | |
else: | |
return -time.timezone | |
UTC = TimeZone(0) | |
USER_TZ = TimeZone(local_time_offset()) | |
def get_dates(tree): | |
''' | |
Extract translation timestamps as //tuv@changedate or //tuv@creationdate | |
strings, and return them as a sorted list of datetimes. | |
''' | |
result = [] | |
for tuv in tree.findall(".//tuv"): | |
creation_date = tuv.get("creationdate") | |
change_date = tuv.get("changedate") | |
try: | |
if creation_date != None: | |
dt = datetime.strptime(creation_date, "%Y%m%dT%H%M%SZ") | |
result.append(dt.replace(tzinfo=UTC)) | |
if change_date != None and change_date != creation_date: | |
dt = datetime.strptime(change_date, "%Y%m%dT%H%M%SZ") | |
result.append(dt.replace(tzinfo=UTC)) | |
except ValueError, e: | |
pass | |
return sorted(result) | |
def bin_dates(dates): | |
''' | |
Given a sorted list of datetimes, reduce to a set of "bins" | |
(start and end times), defined as a period of time where no two | |
datetimes are separated by more than BIN_DELTA. | |
''' | |
result = {} | |
if len(dates) < 1: | |
return result | |
start = dates[0] | |
result[start] = None | |
for date in dates: | |
end = result[start] | |
if end == None: | |
end = start | |
if date - end < BIN_DELTA: | |
result[start] = date | |
else: | |
start = date | |
result[start] = None | |
return result | |
def print_bins(bins): | |
''' | |
Given a dictionary of bins, print the date, start and end | |
times, and duration. Finally, print the total time covered | |
by all bins. | |
''' | |
total = timedelta(0) | |
master_adjusted = "" | |
for start, end in sorted(bins.items()): | |
local_start = start.astimezone(USER_TZ) | |
print "{0}: {1} -".format(local_start.strftime("%Y/%m/%d"), | |
local_start.strftime("%H:%M")), | |
adjusted = "" | |
# A bin may have end == None if just one TUV was changed. | |
if end == None: | |
end = start | |
local_end = end.astimezone(USER_TZ) | |
delta = local_end - local_start | |
# Ensure minimum work time | |
if delta < MIN_WORK_TIME: | |
delta = MIN_WORK_TIME | |
local_end = local_start + MIN_WORK_TIME | |
adjusted = "*" | |
master_adjusted = "*" | |
total += delta | |
print local_end.strftime("%H:%M"), " ({0}{1})".format(str(delta), | |
adjusted) | |
print "Total: {0}{1}".format(str(total), master_adjusted) | |
if __name__ == "__main__": | |
if len(sys.argv) < 2 or not os.path.isfile(sys.argv[1]) \ | |
or not sys.argv[1].endswith(".tmx"): | |
print "Please specify a valid input TMX" | |
sys.exit(1) | |
try: | |
tree = ElementTree.parse(sys.argv[1]) | |
dates = get_dates(tree) | |
bins = bin_dates(dates) | |
print_bins(bins) | |
except ElementTree.ParseError: | |
print "Invalid input TMX" | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment