Last active
August 29, 2015 14:16
-
-
Save kostyll/5fe35f8a5da3561b95a0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
class Segment(object): | |
def __init__(self,item,getter): | |
self.index,self._data = getter(item) | |
self.len = len(self._data) | |
def __repr__(self): | |
return "<<Segment[%s:%s]['%s']>>" % (self.index, self.len,self.data) | |
def __str__(self): | |
return str(self.data) | |
@property | |
def data(self): | |
return self._data | |
@data.setter | |
def data(self, value): | |
self._data = value | |
self.len = len(self._data) | |
class SegmentMap(object): | |
def __init__(self): | |
self.segments = list() | |
def check_segments(self,index): | |
l = len(self.segments) | |
i = index+1 | |
while i<l: | |
print "Iteration index=%s, i=%s, segments = %s" % (index,i,self.segments) | |
if self.intersects(index,i): | |
print "intersects %s & %s" % (index,i) | |
self.merge_segments(index, i) | |
l = len(self.segments) | |
continue | |
else: | |
i += 1 | |
l = len(self.segments) | |
def intersects(self,index1,index2): | |
if index1 > index2: | |
index2,index1 = index1,index2 | |
return self.segments[index1].index+self.segments[index1].len > \ | |
self.segments[index2].index | |
def includes(self,index1,index2): | |
if index1 > index2: | |
index2,index1 = index1,index2 | |
return self.segments[index1].index+self.segments[index1].len > \ | |
self.segments[index2].index+self.segments[index2].len | |
def get_delta(self,index1,index2): | |
return self.segments[index1].index+self.segments[index1].len - self.segments[index2].index | |
def merge_segments(self,index1,index2): | |
if index1 > index2: | |
index2,index1 = index1,index2 | |
if index2 - index1 > 1: | |
return | |
if self.includes(index1, index2): | |
print "includes %s & %s" % (index1,index2) | |
#self.segments[index1+1:index2+1] = [] | |
del self.segments[index2] | |
else: | |
delta = self.get_delta(index1,index2) | |
print "joining %s & %s, delta = %s" % (index1,index2,delta) | |
self.segments[index1].data = self.segments[index1].data+self.segments[index2].data[delta:] | |
self.segments[index1].len = len(self.segments[index1].data) | |
del self.segments[index2] | |
def insert_segment(self,index,new_segment): | |
self.segments.insert(index, new_segment) | |
self.check_segments(index) | |
def add_segment(self,new_segment): | |
for index in xrange(len(self.segments)): | |
if self.segments[index].index >= new_segment.index: | |
self.insert_segment(index, new_segment) | |
return | |
self.segments.append(new_segment) | |
self.check_segments(0) | |
def __repr__(self): | |
return self.repr() | |
def repr(self,filler='\0',fill=True): | |
if not fill: | |
return "".join(map(lambda x: x.data,self.segments)) | |
else: | |
data = "" | |
for index in xrange(len(self.segments)-1): | |
delta = self.get_delta(index,index+1) | |
data +=self.segments[index].data+filler*delta | |
data += self.segments[-1].data | |
return data | |
if __name__ == "__main__": | |
import random | |
text = 'This news is from China. Twins are born there. These twins are connected. Connected twins are called conjoined twins.' | |
print "text = %s" % text | |
l = len(text) | |
sm = SegmentMap() | |
for item in [0,5,38,7,60,82,3]: | |
new_item_getter = lambda x: (item+30,text[item:item+40+random.randint(3,10)]) | |
new_segment = Segment(None, new_item_getter) | |
sm.add_segment(new_segment) | |
print "after add = %s" % sm.segments | |
print "Works = %s" % (text == sm.segments[0].data) | |
print sm.segments |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment