Created
June 3, 2012 03:25
-
-
Save washort/2861703 to your computer and use it in GitHub Desktop.
Twine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from unittest import TestCase | |
from terml.twine import SourceSpan, Twine | |
class SourceSpanTests(TestCase): | |
def test_creation(self): | |
ss = SourceSpan("http://example.org/t", True, 1, 0, 1, 9) | |
self.assertEqual(ss, | |
SourceSpan("http://example.org/t", True, 1, 0, 1, 9)) | |
self.assertEqual(list(ss), ["http://example.org/t", True, 1, 0, 1, 9]) | |
self.assertEqual(ss.uri, "http://example.org/t") | |
self.assertEqual(ss.isOneToOne, True) | |
self.assertEqual(ss.startLine, 1) | |
self.assertEqual(ss.startCol, 0) | |
self.assertEqual(ss.endLine, 1) | |
self.assertEqual(ss.endCol, 9) | |
def test_oneToOne(self): | |
ss = SourceSpan("http://example.org/t", True, 1, 0, 1, 9) | |
self.assertEqual(list(ss.notOneToOne()), | |
["http://example.org/t", False, 1, 0, 1, 9]) | |
self.assertRaises(ValueError, SourceSpan, | |
"http://example.org/t", True, 1, 0, 2, 7) | |
class TwineTests(TestCase): | |
def test_creation(self): | |
ss = SourceSpan("http://example.org/t", True, 1, 0, 1, 9) | |
t = Twine(u"foo baz") | |
self.assertEqual(t.span, None) | |
t = Twine(u"foo baz", ss) | |
self.assertEqual(t.span, ss) | |
def test_asFrom(self): | |
t = Twine(u"foo baz").asFrom("test.txt") | |
self.assertEqual(t.span, SourceSpan("test.txt", True, 1, 0, 1, 6)) | |
self.assertEqual(Twine(u"abc\ndef").asFrom("test.txt").span, | |
SourceSpan("test.txt", False, 1, 0, 2, 2)) | |
self.assertEqual(Twine(u"abc\ndef").asFrom("test.txt", 3, 10).span, | |
SourceSpan("test.txt", False, 3, 10, 4, 2)) | |
self.assertEqual(Twine(u"abcdef").asFrom("test.txt").span, | |
SourceSpan("test.txt", True, 1, 0, 1, 5)) | |
self.assertEqual(Twine(u"abcdef\n").asFrom("test.txt").span, | |
SourceSpan("test.txt", True, 1, 0, 1, 6)) | |
self.assertEqual(Twine(u"abcdef\nghijkl").asFrom("test.txt").span, | |
SourceSpan("test.txt", False, 1, 0, 2, 5)) | |
self.assertEqual(Twine(u"abcdef\nghijkl").asFrom("test.txt")[:6].span, | |
SourceSpan("test.txt", True, 1, 0, 1, 5)) | |
self.assertEqual(Twine(u"").asFrom("test.txt").span, None) | |
self.assertEqual(Twine(u"\n").asFrom("test.txt").span, | |
SourceSpan("test.txt", True, 1, 0, 1, 0)) | |
self.assertEqual(Twine(u"\n\n").asFrom("test.txt").span, | |
SourceSpan("test.txt", False, 1, 0, 2, 0)) | |
self.assertEqual(Twine(u"abcdef\n\n").asFrom("test.txt").span, | |
SourceSpan("test.txt", False, 1, 0, 2, 0)) | |
self.assertEqual(Twine(u"abcdef\ng\n").asFrom("test.txt").span, | |
SourceSpan("test.txt", False, 1, 0, 2, 1)) | |
self.assertEqual(Twine(u"abcdef\ng").asFrom("test.txt").span, | |
SourceSpan("test.txt", False, 1, 0, 2, 0)) | |
def test_slice(self): | |
t = Twine(u"abc\ndef\n\nghij\n\n").asFrom("foo:bar") | |
self.assertEqual(t[:3].span, | |
SourceSpan("foo:bar", True, 1, 0, 1, 2)) | |
self.assertEqual(t[2:6].span, | |
SourceSpan("foo:bar", False, 1, 2, 2, 1)) | |
self.assertEqual(t[2].span, | |
SourceSpan("foo:bar", True, 1, 2, 1, 2)) | |
def test_split(self): | |
t = Twine(u"abc\ndef\n\nghij\n\n").asFrom("foo:bar") | |
self.assertEqual([x.span for x in t.split('\n')], | |
[SourceSpan("foo:bar", True, 1, 0, 1, 2), | |
SourceSpan("foo:bar", True, 2, 0, 2, 2), | |
None, | |
SourceSpan("foo:bar", True, 4, 0, 4, 3), | |
None, | |
None]) | |
def test_rsplit(self): | |
t = Twine(u"abc\ndef\n\nghij\n\n").asFrom("foo:bar") | |
self.assertEqual([x.span for x in t.rsplit('\n')], | |
[SourceSpan("foo:bar", True, 1, 0, 1, 2), | |
SourceSpan("foo:bar", True, 2, 0, 2, 2), | |
None, | |
SourceSpan("foo:bar", True, 4, 0, 4, 3), | |
None, | |
None]) | |
def test_concat(self): | |
t1 = Twine(u"foo ", SourceSpan("foo:bar", True, 1, 0, 1, 3)) | |
t2 = Twine(u"baz", SourceSpan("foo:bar", True, 1, 3, 1, 5)) | |
self.assertEqual((t1 + t2).span, | |
SourceSpan("foo:bar", True, 1, 0, 1, 5)) | |
self.assertEqual((t1 + t2).parts, | |
[t1, t2]) | |
def test_eq(self): | |
t1 = Twine(u"foo ", SourceSpan("foo:bar", True, 1, 0, 1, 3)) | |
self.assertEqual(t1, | |
Twine(u"foo ", SourceSpan("foo:bar", True, 1, 0, 1, 3))) | |
self.assertEqual(t1, u"foo ") | |
def test_join(self): | |
ts = Twine(u'one two three', SourceSpan("foo:bar", True, 1, 0, 1, 3)) | |
words = ts.split(u' ') | |
t = Twine(u', ').join(words) | |
self.assertEqual(t.span, None) | |
self.assertEqual(t.parts, | |
(u'one', u', ', u'two', u', ', u'three')) | |
self.assertEqual(t.sourceMap, | |
(((0, 3), SourceSpan("foo:bar", True, 1, 0, 1, 2)), | |
((5, 8), SourceSpan("foo:bar", True, 1, 4, 1, 6)), | |
((10, 15), SourceSpan("foo:bar", True, 1, 8, 1, 12)))) | |
def test_replace(self): | |
t = Twine(u'one two three').asFrom("foo:bar") | |
t2 = t.replace(u'two', u'eleventy') | |
self.assertEqual(t.span, SourceSpan("foo:bar", False, 1, 0, 1, 12)) | |
self.assertEqual(t2.parts, (u'one ', u'eleventy', u' three')) | |
self.assertEqual(t2.sourceMap, | |
(((0, 4), SourceSpan("foo:bar", True, 1, 0, 1, 3)), | |
((4, 12), SourceSpan("foo:bar", False, 1, 4, 1, 6)), | |
(12, 18), SourceSpan("foo:bar", True, 1, 7, 1, 12))) | |
# def test_format(self): | |
# pass | |
# def test_mod(self): | |
# pass | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
_SourceSpan = namedtuple("SourceSpan", | |
"uri isOneToOne startLine startCol endLine endCol") | |
class SourceSpan(_SourceSpan): | |
""" | |
Information about the original location of a span of text. | |
Twines use this to remember where they came from. | |
uri: Name of document this text came from. | |
isOneToOne: Whether each character in that Twine maps to the | |
corresponding source character position. | |
startLine, endLine: Line numbers for the beginning and end of the | |
span. Line numbers start at 1. | |
startCol, endCol: Column numbers for the beginning and end of the | |
span. Column numbers start at 0. | |
""" | |
def __new__(*args, **kwargs): | |
ss = _SourceSpan.__new__(*args, **kwargs) | |
if (ss.startLine != ss.endLine and ss.isOneToOne): | |
raise ValueError("one-to-one spans must be on a line") | |
return ss | |
def notOneToOne(self): | |
return SourceSpan(self.uri, False, self.startLine, self.startCol, | |
self.endLine, self.endCol) | |
def __repr__(self): | |
return "<%s#:%s::%s>" % (self.uri, | |
"span" if self.isOneToOne else "blob", | |
':'.join(str(x) for x in self[2:])) | |
def spanCover(a, b): | |
""" | |
Create a new SourceSpan that covers spans `a` and `b`. | |
""" | |
if a is None or b is None or a.uri != b.uri: | |
return None | |
if (a.isOneToOne and b.isOneToOne | |
and a.endLine == b.startLine | |
and b.endCol + 1 == b.startCol): | |
# These spans are adjacent. | |
return SourceSpan(a.uri, True, | |
a.startLine, a.startCol, | |
b.endLine, b.endCol) | |
# find the earlier start point | |
if a.startLine < b.startLine: | |
startLine = a.startLine | |
startCol = a.startCol | |
elif a.startLine == b.startLine: | |
startLine = a.startLine | |
startCol = min(a.startCol, b.startCol) | |
else: | |
startLine = b.startLine | |
startCol = b.startCol | |
#find the later end point | |
if b.endLine > a.endLine: | |
endLine = b.endLine | |
endCol = b.endCol | |
elif a.endLine == b.endLine: | |
endLine = a.endLine | |
endCol = max(a.endCol, b.endCol) | |
else: | |
endLine = a.endLine | |
endCol = a.endCol | |
return SourceSpan(a.uri, False, startLine, startCol, endLine, endCol) | |
class Twine(unicode): | |
""" | |
A text string that remembers where it came from. | |
""" | |
def __new__(self, input, span=None): | |
return unicode.__new__(self, input) | |
def __init__(self, input, span=None): | |
self._span = span | |
@classmethod | |
def fromParts(cls, parts): | |
""" | |
Return a Twine that contains, in sequence, all the Twines in | |
the iterable `parts`. | |
""" | |
if not parts: | |
return Twine(u"") | |
elif len(parts) == 1: | |
return parts[0] | |
else: | |
return CompositeTwine(parts) | |
def asFrom(self, sourceURI, startLine=1, startCol=0): | |
""" | |
Return a Twine with source span info from the given URI and | |
(optionally) start position. | |
""" | |
parts = [] | |
s = unicode(self) | |
ln = len(s) | |
start = 0 | |
end = 0 | |
while start < ln: | |
end = s.find('\n', start) | |
if end == -1: | |
end = ln - 1 | |
endCol = startCol + end - start | |
ss = SourceSpan(sourceURI, True, startLine, startCol, startLine, endCol) | |
parts.append(Twine(s[start:end+1], ss)) | |
startLine += 1 | |
startCol = 0 | |
start = end + 1 | |
return Twine.fromParts(parts) | |
@property | |
def span(self): | |
return self._span | |
def __getslice__(self, i, j): | |
return self.__getitem__(slice(i, j)) | |
def __getitem__(self, idxOrSlice): | |
""" | |
Return a new Twine sliced out of this one, with a matching | |
SourceSpan. | |
""" | |
if isinstance(idxOrSlice, int): | |
start = idxOrSlice | |
stop = start + 1 | |
step = 1 | |
else: | |
start, stop, step = idxOrSlice.indices(len(self)) | |
if start == stop: | |
return Twine(u"") | |
if start == 0 and stop == len(self): | |
return self | |
return self._slice(start, stop, step) | |
def _slice(self, start, stop, step): | |
""" | |
This twine is atomic, so a simple slice and updated SourceSpan | |
will do. | |
""" | |
s = unicode.__getitem__(self, slice(start, stop, step)) | |
if self._span and self._span.isOneToOne: | |
startCol = self._span.startCol + start | |
endCol = startCol + (stop - start) - 1 | |
span = SourceSpan(self._span.uri, step == 1, | |
self._span.startLine, | |
startCol, | |
self._span.endLine, | |
endCol) | |
else: | |
span = self._span | |
return Twine(s, span) | |
class CompositeTwine(Twine): | |
def __new__(self, parts): | |
return Twine.__new__(self, parts) | |
def __init__(self, parts): | |
self._parts = tuple(parts) | |
@property | |
def parts(self): | |
return self._parts | |
@property | |
def span(self): | |
if not self._parts: | |
return None | |
ss = self._parts[0].span | |
for part in self._parts[1:]: | |
if not ss: | |
return None | |
ss = spanCover(ss, part.span) | |
return ss | |
def __len__(self): | |
return sum(len(p) for p in self._parts) | |
def _getPartAt(self, pos): | |
""" | |
Find the part that `pos` is an index into. For instance, if | |
self._parts is ['abc', 'def', 'ghi'], 2 is an index into part | |
0, and 4 is an index into part 1. | |
""" | |
search = 0 | |
for i, p in enumerate(self._parts): | |
if pos < search + len(p): | |
return [i, pos - search] | |
search += len(p) | |
raise IndexError("%s bigger than %s" % (pos, search)) | |
def _slice(self, start, stop, step): | |
""" | |
Build a slice by extracting the relevant parts from this | |
twine, slicing them if necessary, and returning a new | |
CompositeTwine made from them. | |
""" | |
leftIdx, leftOffset = self._getPartAt(start) | |
left = self._parts[leftIdx] | |
rightIdx, rightOffset = self._getPartAt(stop) | |
if leftIdx == rightIdx: | |
# slice start/end falls in the same part | |
return left[leftOffset:rightOffset] | |
else: | |
right = self._parts[rightIdx] | |
leftScrap = left[leftOffset::step] | |
middle = self._parts[leftIdx + 1:rightIdx] | |
if step != 1: | |
# gotta count leftovers on the end of each part after | |
# slicing with steps | |
newMiddle = [] | |
stepOffset = step - (len(leftScrap) % step) | |
for part in middle: | |
newMiddle.append(part[stepOffset::step]) | |
stepOffset = step - (len(part) % step) | |
middle = tuple(newMiddle) | |
else: | |
stepOffset = 0 | |
rightScrap = right[stepOffset:rightOffset:step] | |
return Twine.fromParts((leftScrap,) + middle + (rightScrap,)) | |
def __repr__(self): | |
return repr(u''.join(self._parts)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment