-
-
Save rik0/813271 to your computer and use it in GitHub Desktop.
# Copyright (C) 2011 by Enrico Franchi | |
# | |
# This file is released under the terms of the MIT license | |
# http://www.opensource.org/licenses/mit-license.php | |
import itertools as it | |
def silence_generator_already_executing(generator): | |
try: | |
for element in generator: | |
yield element | |
except ValueError, e: | |
pass | |
class lazy_set(object): | |
""" | |
Trace of implementation of lazy sets in Python. | |
""" | |
def __init__(self, input_=[]): | |
self.input_ = iter(input_) | |
self.seen = set() | |
def _iter(self): | |
for element in self.input_: | |
if element in self.seen: | |
continue | |
else: | |
yield element | |
self.seen.add(element) | |
def add(self, obj): | |
self.seen.add(obj) | |
def update(self, iterator): | |
self.input_ = it.chain(self.input_, | |
silence_generator_already_executing(iterator)) | |
def __iter__(self): | |
return it.chain(self.seen, self._iter()) | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
import unittest | |
import lazy_set | |
class LazySetTest(unittest.TestCase): | |
def setUp(self): | |
self.lazy_set = lazy_set.lazy_set(xrange(1, 4)) | |
self.starting_value = range(1, 4) | |
def testCreation(self): | |
self.assertEqual( | |
sorted(self.lazy_set), | |
self.starting_value | |
) | |
def testUpdate(self): | |
self.lazy_set.update(2* x for x in range(4, 10)) | |
self.assertEqual( | |
sorted(self.lazy_set), | |
[1, 2, 3, 8, 10, 12, 14, 16, 18] | |
) | |
def testAdd(self): | |
self.lazy_set.add(5) | |
self.assertEqual( | |
sorted(self.lazy_set), | |
[1, 2, 3, 5] | |
) | |
def testUniqueness(self): | |
self.lazy_set.update(xrange(5)) | |
self.lazy_set.update(xrange(3, 8)) | |
self.assertEqual( | |
sorted(self.lazy_set), | |
range(8) | |
) | |
def testSelfUpdateWIter(self): | |
self.lazy_set.update(iter(self.lazy_set)) | |
self.assertEqual( | |
sorted(self.lazy_set), | |
self.starting_value | |
) | |
def testSelfUpdateWOIter(self): | |
self.lazy_set.update(self.lazy_set) | |
self.assertEqual( | |
sorted(self.lazy_set), | |
self.starting_value | |
) | |
def testSelfUpdateNotLats(self): | |
self.lazy_set.update(iter(self.lazy_set)) | |
self.lazy_set.update(xrange(5, 8)) | |
self.assertEqual( | |
sorted(self.lazy_set), | |
range(1, 4) + range(5, 8) | |
) | |
if __name__ == '__main__': | |
unittest.main() |
Lines 36-37 are ok: we keep track of the elements we have already seen and do not want to yield them again. It's a set: items are unique.
You are supposed to use it "as a real set" when all the proper methods are implemented. Right now, it is nor extremely useful.
The advantages? In principle the thing is lazy. You can pass a huge generator as an input and the set is immediately constructed, without having to wait that all the elements are generated (plain sets in python are eager).
I do not really believe that this is extremely useful, but it is a nice exercise. I believe it is pretty tricky to get right. I would advise everyone against using it until it has passed all the unit tests for the real set, which would prove that it behaves nicely.
TY... but I think you didn't understand what I wanted to say about lines 36-37 (...or maybe I don't understand your answer).
Isn't better to write directly:
if element not in self.seen:
yield element
self.seen.add(element)
without lines 36-37?
You asked if they were redundant (No longer needed or useful; superfluous;Able to be omitted without loss of meaning or function).
The answer to that question is no. They cannot be omitted without loss of meaning or function.
What I now understand you meant is: can that functionality provided using less lines of code? The answer is "yes". The code you provided is essentially equivalent on (cPython you save 3 vm operations).
However, this kind of stuff is rather unimportant: I wrote it this way for no reason that there is less code to change should we seriously change the definition. As I said, I'm not sure I'm not missing something. I do not even believe the code is perfectly correct; as a consequence micro optimizations are premature.
Moreover, in cases just less trivial than this (here a formal proof is feasible) it is not wise to change code (in particular for micro-optimizing) without a full set of unit tests which would catch freshly introduced bugs.
But the short story is: yes. You could have written that way.
Please, do not consider my last post as "this kind of stuff is not relevant". It is good to consider readability, style and efficiency (in this order) when writing code. It is the only way to improve. In pair programming, it would be the navigator to consider these issues.
Good job... but aren't redundant lines 36-7?
However, how can I use it? Which are the advantages compared using normal sets?
Marco.