Skip to content

Instantly share code, notes, and snippets.

@stepancheg
Created March 6, 2015 22:36
Show Gist options
  • Save stepancheg/0e032a519d834e5dfa73 to your computer and use it in GitHub Desktop.
Save stepancheg/0e032a519d834e5dfa73 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import os
import re
class Timestamp:
__slots__ = ('year', 'month', 'day', 'hour', 'minute', 'second')
def __init__(self, year, month, day, hour, minute, second):
self.year = year
self.month = month
self.day = day
self.hour = hour
self.minute = minute
self.second = second
def _list(self):
return tuple( getattr(self, slot) for slot in Timestamp.__slots__ )
def format(self):
return '%04d-%02d-%02dT%02d:%02d:%02d' % self._list()
def __repr__(self):
return 'Timestamp(%s)' % ', '.join([ str(x) for x in self._list() ])
def digits_in_num(n):
"""
>>> digits_in_num(0)
1
>>> digits_in_num(1)
1
>>> digits_in_num(9)
1
>>> digits_in_num(100)
3
>>> digits_in_num(200)
3
>>> digits_in_num(899)
3
"""
if n < 10:
return 1
return digits_in_num(n / 10) + 1
def pattern_lower(pattern):
if pattern == '':
return ''
elif pattern[0] in 'T:-':
return pattern[0] + pattern_lower(pattern[1:])
else:
return '0' + pattern_lower(pattern[1:])
def pattern_upper(pattern):
return pattern
def inc(c):
assert c >= '0'
assert c <= '8'
return str(int(c) + 1)
def dec(c):
assert c >= '1'
assert c <= '9'
return str(int(c) - 1)
def range_re(b, e, pattern):
"""
>>> range_re('', '', '')
''
>>> range_re(':', ':', ':')
':'
>>> range_re('T', 'T', 'T')
'[ T]'
>>> range_re('3', '3', '9')
'3'
>>> range_re('2', '3', '9')
'[23]'
>>> range_re('0', '3', '9')
'[0-3]'
>>> range_re('02', '07', '99')
'0[2-7]'
>>> range_re('12', '17', '99')
'1[2-7]'
>>> range_re('00', '99', '99')
'[0-9][0-9]'
>>> range_re('000', '999', '999')
'[0-9][0-9][0-9]'
>>> range_re('200', '499', '999')
'[2-4][0-9][0-9]'
>>> range_re('1:59', '2:00', '9:59')
'(1:59|2:00)'
>>> range_re('220', '239', '999')
'2[23][0-9]'
>>> range_re('12:', '36:', '99:')
'(1[2-9]|2[0-9]|3[0-6]):'
>>> range_re('120', '369', '999')
'(1[2-9]|2[0-9]|3[0-6])[0-9]'
>>> range_re('15', '23', '99')
'(1[5-9]|2[0-3])'
>>> range_re('15', '42', '99')
'(1[5-9]|[23][0-9]|4[0-2])'
>>> range_re('234', '569', '999')
'(2(3[4-9]|[4-9][0-9])|[34][0-9][0-9]|5[0-6][0-9])'
"""
assert len(b) == len(pattern)
assert len(e) == len(pattern)
if len(pattern) == 0:
return ''
if pattern[0] in 'T:-':
assert b[0] == pattern[0], (b, e, pattern)
assert e[0] == pattern[0]
if pattern[0] == 'T':
return '[ T]' + range_re(b[1:], e[1:], pattern[1:])
else:
return pattern[0] + range_re(b[1:], e[1:], pattern[1:])
assert pattern[0] >= '0'
assert pattern[0] <= '9'
assert b[0] <= pattern[0]
assert e[0] <= pattern[0]
if len(pattern) == 1:
if b == e:
return b
elif int(e) - int(b) == 1:
return '[%s%s]' % (b, e)
else:
return '[%s-%s]' % (b, e)
if b[0] == e[0]:
return b[0] + range_re(b[1:], e[1:], pattern[1:])
if b[-1] == e[-1]:
return range_re(b[:-1], e[:-1], pattern[:-1]) + b[-1]
if b[-1] == '0' and e[-1] == pattern[-1]:
return range_re(b[:-1], e[:-1], pattern[:-1]) + range_re(b[-1], e[-1], pattern[-1])
rs = []
if b[1:] != pattern_lower(pattern[1:]):
bf_mid = inc(b[0])
else:
bf_mid = b[0]
if e[1:] != pattern_upper(pattern[1:]):
ef_mid = dec(e[0])
else:
ef_mid = e[0]
# working here
if bf_mid != b[0]:
rs.append(b[0] + range_re(b[1:], pattern_upper(pattern[1:]), pattern[1:]))
if ef_mid >= bf_mid:
rs.append(range_re(bf_mid, ef_mid, pattern[0]) +
range_re(pattern_lower(pattern[1:]), pattern_upper(pattern[1:]), pattern[1:]))
if ef_mid != e[0]:
rs.append(e[0] + range_re(pattern_lower(pattern[1:]), e[1:], pattern[1:]))
if len(rs) == 1:
return rs[0]
else:
return '(' + '|'.join(rs) + ')'
def num_range_re(b, e, digits):
"""
>>> num_range_re(0, 0, 1)
'0'
>>> num_range_re(9, 9, 1)
'9'
>>> num_range_re(8, 9, 1)
'[89]'
>>> num_range_re(2, 7, 1)
'[2-7]'
>>> num_range_re(2, 7, 2)
'0[2-7]'
>>> num_range_re(12, 17, 2)
'1[2-7]'
>>> num_range_re(0, 99, 2)
'[0-9][0-9]'
>>> num_range_re(0, 999, 3)
'[0-9][0-9][0-9]'
>>> num_range_re(200, 499, 3)
'[2-4][0-9][0-9]'
>>> num_range_re(220, 239, 3)
'2[23][0-9]'
>>> num_range_re(15, 23, 2)
'(1[5-9]|2[0-3])'
>>> num_range_re(15, 42, 2)
'(1[5-9]|[23][0-9]|4[0-2])'
>>> num_range_re(234, 569, 3)
'(2(3[4-9]|[4-9][0-9])|[34][0-9][0-9]|5[0-6][0-9])'
"""
assert b <= e
assert digits > 0
assert digits_in_num(e) <= digits
if digits_in_num(e) < digits:
return '0' + num_range_re(b, e, digits - 1)
if b == e:
return str(b)
if digits_in_num(e) == 1:
if e - b == 1:
return '[%d%d]' % (b, e)
else:
return '[%d-%d]' % (b, e)
power = 10 ** digits
power1 = 10 ** (digits - 1)
bf = b / power1
ef = e / power1
if bf == ef:
return str(bf) + num_range_re(b % power1, e % power1, digits - 1)
rs = []
ef_mid = ef - 1
if b != bf * power1:
bf_mid = bf + 1
else:
bf_mid = bf
if e != (ef + 1) * power1 - 1:
ef_mid = ef - 1
else:
ef_mid = ef
if bf_mid != bf:
rs.append(str(bf) + num_range_re(b % power1, power1 - 1, digits - 1))
if ef_mid >= bf_mid:
rs.append(num_range_re(bf_mid, ef_mid, 1) + num_range_re(0, power1 - 1, digits - 1))
if ef_mid != ef:
rs.append(str(ef) + num_range_re(0, e % power1, digits - 1))
if len(rs) == 1:
return rs[0]
else:
return '(' + '|'.join(rs) + ')'
def gt_re(ts):
r = ''
def parse_timestamp(timestamp):
"""
>>> parse_timestamp('2012-03-06')
Timestamp(2012, 3, 6, 0, 0, 0)
>>> parse_timestamp('2012-03-06 12')
Timestamp(2012, 3, 6, 12, 0, 0)
>>> parse_timestamp('2012-03-06 12:30')
Timestamp(2012, 3, 6, 12, 30, 0)
>>> parse_timestamp('2012-03-06T12:30')
Timestamp(2012, 3, 6, 12, 30, 0)
>>> parse_timestamp('2015-03-06T22:31')
Timestamp(2015, 3, 6, 22, 31, 0)
>>> parse_timestamp('2012-03-06 12:30:59')
Timestamp(2012, 3, 6, 12, 30, 59)
"""
r = re.compile(
'^(20[0-9][0-9])-([01][0-9])-([0-3][0-9])'
'(?:[ T]([0-2][0-9])(?::([0-5][0-9])(?::([0-5][0-9]))?)?)?$')
m = r.match(timestamp)
assert m, 'failed to parse timestamp: ' + timestamp
args = [ int(p or '0') for p in m.groups() ]
return Timestamp(*args)
def expr_to_re(expr):
timestamp = parse_timestamp(expr)
pattern = '2099-19-39T59:59:59'
return range_re(timestamp.format(), pattern, pattern_upper(pattern))
def main():
if sys.argv[1:] == ['--doctest']:
import doctest
doctest.testmod()
return
args = sys.argv[1:]
expr = args.pop(0)
grep_args = ['egrep']
grep_args += ['-e', expr_to_re(expr)]
grep_args += args
print ' '.join(grep_args)
os.execvp("egrep", grep_args)
if __name__ == '__main__':
main()
# vim: set ts=4 sw=4 et:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment