Created
March 6, 2015 22:36
-
-
Save stepancheg/0e032a519d834e5dfa73 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import os | |
import re | |
class Timestamp: | |
__slots__ = ('year', 'month', 'day', 'hour', 'minute', 'second') | |
def __init__(self, year, month, day, hour, minute, second): | |
self.year = year | |
self.month = month | |
self.day = day | |
self.hour = hour | |
self.minute = minute | |
self.second = second | |
def _list(self): | |
return tuple( getattr(self, slot) for slot in Timestamp.__slots__ ) | |
def format(self): | |
return '%04d-%02d-%02dT%02d:%02d:%02d' % self._list() | |
def __repr__(self): | |
return 'Timestamp(%s)' % ', '.join([ str(x) for x in self._list() ]) | |
def digits_in_num(n): | |
""" | |
>>> digits_in_num(0) | |
1 | |
>>> digits_in_num(1) | |
1 | |
>>> digits_in_num(9) | |
1 | |
>>> digits_in_num(100) | |
3 | |
>>> digits_in_num(200) | |
3 | |
>>> digits_in_num(899) | |
3 | |
""" | |
if n < 10: | |
return 1 | |
return digits_in_num(n / 10) + 1 | |
def pattern_lower(pattern): | |
if pattern == '': | |
return '' | |
elif pattern[0] in 'T:-': | |
return pattern[0] + pattern_lower(pattern[1:]) | |
else: | |
return '0' + pattern_lower(pattern[1:]) | |
def pattern_upper(pattern): | |
return pattern | |
def inc(c): | |
assert c >= '0' | |
assert c <= '8' | |
return str(int(c) + 1) | |
def dec(c): | |
assert c >= '1' | |
assert c <= '9' | |
return str(int(c) - 1) | |
def range_re(b, e, pattern): | |
""" | |
>>> range_re('', '', '') | |
'' | |
>>> range_re(':', ':', ':') | |
':' | |
>>> range_re('T', 'T', 'T') | |
'[ T]' | |
>>> range_re('3', '3', '9') | |
'3' | |
>>> range_re('2', '3', '9') | |
'[23]' | |
>>> range_re('0', '3', '9') | |
'[0-3]' | |
>>> range_re('02', '07', '99') | |
'0[2-7]' | |
>>> range_re('12', '17', '99') | |
'1[2-7]' | |
>>> range_re('00', '99', '99') | |
'[0-9][0-9]' | |
>>> range_re('000', '999', '999') | |
'[0-9][0-9][0-9]' | |
>>> range_re('200', '499', '999') | |
'[2-4][0-9][0-9]' | |
>>> range_re('1:59', '2:00', '9:59') | |
'(1:59|2:00)' | |
>>> range_re('220', '239', '999') | |
'2[23][0-9]' | |
>>> range_re('12:', '36:', '99:') | |
'(1[2-9]|2[0-9]|3[0-6]):' | |
>>> range_re('120', '369', '999') | |
'(1[2-9]|2[0-9]|3[0-6])[0-9]' | |
>>> range_re('15', '23', '99') | |
'(1[5-9]|2[0-3])' | |
>>> range_re('15', '42', '99') | |
'(1[5-9]|[23][0-9]|4[0-2])' | |
>>> range_re('234', '569', '999') | |
'(2(3[4-9]|[4-9][0-9])|[34][0-9][0-9]|5[0-6][0-9])' | |
""" | |
assert len(b) == len(pattern) | |
assert len(e) == len(pattern) | |
if len(pattern) == 0: | |
return '' | |
if pattern[0] in 'T:-': | |
assert b[0] == pattern[0], (b, e, pattern) | |
assert e[0] == pattern[0] | |
if pattern[0] == 'T': | |
return '[ T]' + range_re(b[1:], e[1:], pattern[1:]) | |
else: | |
return pattern[0] + range_re(b[1:], e[1:], pattern[1:]) | |
assert pattern[0] >= '0' | |
assert pattern[0] <= '9' | |
assert b[0] <= pattern[0] | |
assert e[0] <= pattern[0] | |
if len(pattern) == 1: | |
if b == e: | |
return b | |
elif int(e) - int(b) == 1: | |
return '[%s%s]' % (b, e) | |
else: | |
return '[%s-%s]' % (b, e) | |
if b[0] == e[0]: | |
return b[0] + range_re(b[1:], e[1:], pattern[1:]) | |
if b[-1] == e[-1]: | |
return range_re(b[:-1], e[:-1], pattern[:-1]) + b[-1] | |
if b[-1] == '0' and e[-1] == pattern[-1]: | |
return range_re(b[:-1], e[:-1], pattern[:-1]) + range_re(b[-1], e[-1], pattern[-1]) | |
rs = [] | |
if b[1:] != pattern_lower(pattern[1:]): | |
bf_mid = inc(b[0]) | |
else: | |
bf_mid = b[0] | |
if e[1:] != pattern_upper(pattern[1:]): | |
ef_mid = dec(e[0]) | |
else: | |
ef_mid = e[0] | |
# working here | |
if bf_mid != b[0]: | |
rs.append(b[0] + range_re(b[1:], pattern_upper(pattern[1:]), pattern[1:])) | |
if ef_mid >= bf_mid: | |
rs.append(range_re(bf_mid, ef_mid, pattern[0]) + | |
range_re(pattern_lower(pattern[1:]), pattern_upper(pattern[1:]), pattern[1:])) | |
if ef_mid != e[0]: | |
rs.append(e[0] + range_re(pattern_lower(pattern[1:]), e[1:], pattern[1:])) | |
if len(rs) == 1: | |
return rs[0] | |
else: | |
return '(' + '|'.join(rs) + ')' | |
def num_range_re(b, e, digits): | |
""" | |
>>> num_range_re(0, 0, 1) | |
'0' | |
>>> num_range_re(9, 9, 1) | |
'9' | |
>>> num_range_re(8, 9, 1) | |
'[89]' | |
>>> num_range_re(2, 7, 1) | |
'[2-7]' | |
>>> num_range_re(2, 7, 2) | |
'0[2-7]' | |
>>> num_range_re(12, 17, 2) | |
'1[2-7]' | |
>>> num_range_re(0, 99, 2) | |
'[0-9][0-9]' | |
>>> num_range_re(0, 999, 3) | |
'[0-9][0-9][0-9]' | |
>>> num_range_re(200, 499, 3) | |
'[2-4][0-9][0-9]' | |
>>> num_range_re(220, 239, 3) | |
'2[23][0-9]' | |
>>> num_range_re(15, 23, 2) | |
'(1[5-9]|2[0-3])' | |
>>> num_range_re(15, 42, 2) | |
'(1[5-9]|[23][0-9]|4[0-2])' | |
>>> num_range_re(234, 569, 3) | |
'(2(3[4-9]|[4-9][0-9])|[34][0-9][0-9]|5[0-6][0-9])' | |
""" | |
assert b <= e | |
assert digits > 0 | |
assert digits_in_num(e) <= digits | |
if digits_in_num(e) < digits: | |
return '0' + num_range_re(b, e, digits - 1) | |
if b == e: | |
return str(b) | |
if digits_in_num(e) == 1: | |
if e - b == 1: | |
return '[%d%d]' % (b, e) | |
else: | |
return '[%d-%d]' % (b, e) | |
power = 10 ** digits | |
power1 = 10 ** (digits - 1) | |
bf = b / power1 | |
ef = e / power1 | |
if bf == ef: | |
return str(bf) + num_range_re(b % power1, e % power1, digits - 1) | |
rs = [] | |
ef_mid = ef - 1 | |
if b != bf * power1: | |
bf_mid = bf + 1 | |
else: | |
bf_mid = bf | |
if e != (ef + 1) * power1 - 1: | |
ef_mid = ef - 1 | |
else: | |
ef_mid = ef | |
if bf_mid != bf: | |
rs.append(str(bf) + num_range_re(b % power1, power1 - 1, digits - 1)) | |
if ef_mid >= bf_mid: | |
rs.append(num_range_re(bf_mid, ef_mid, 1) + num_range_re(0, power1 - 1, digits - 1)) | |
if ef_mid != ef: | |
rs.append(str(ef) + num_range_re(0, e % power1, digits - 1)) | |
if len(rs) == 1: | |
return rs[0] | |
else: | |
return '(' + '|'.join(rs) + ')' | |
def gt_re(ts): | |
r = '' | |
def parse_timestamp(timestamp): | |
""" | |
>>> parse_timestamp('2012-03-06') | |
Timestamp(2012, 3, 6, 0, 0, 0) | |
>>> parse_timestamp('2012-03-06 12') | |
Timestamp(2012, 3, 6, 12, 0, 0) | |
>>> parse_timestamp('2012-03-06 12:30') | |
Timestamp(2012, 3, 6, 12, 30, 0) | |
>>> parse_timestamp('2012-03-06T12:30') | |
Timestamp(2012, 3, 6, 12, 30, 0) | |
>>> parse_timestamp('2015-03-06T22:31') | |
Timestamp(2015, 3, 6, 22, 31, 0) | |
>>> parse_timestamp('2012-03-06 12:30:59') | |
Timestamp(2012, 3, 6, 12, 30, 59) | |
""" | |
r = re.compile( | |
'^(20[0-9][0-9])-([01][0-9])-([0-3][0-9])' | |
'(?:[ T]([0-2][0-9])(?::([0-5][0-9])(?::([0-5][0-9]))?)?)?$') | |
m = r.match(timestamp) | |
assert m, 'failed to parse timestamp: ' + timestamp | |
args = [ int(p or '0') for p in m.groups() ] | |
return Timestamp(*args) | |
def expr_to_re(expr): | |
timestamp = parse_timestamp(expr) | |
pattern = '2099-19-39T59:59:59' | |
return range_re(timestamp.format(), pattern, pattern_upper(pattern)) | |
def main(): | |
if sys.argv[1:] == ['--doctest']: | |
import doctest | |
doctest.testmod() | |
return | |
args = sys.argv[1:] | |
expr = args.pop(0) | |
grep_args = ['egrep'] | |
grep_args += ['-e', expr_to_re(expr)] | |
grep_args += args | |
print ' '.join(grep_args) | |
os.execvp("egrep", grep_args) | |
if __name__ == '__main__': | |
main() | |
# vim: set ts=4 sw=4 et: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment