Created
November 19, 2013 05:08
-
-
Save Suor/7540628 to your computer and use it in GitHub Desktop.
Grouping items by lead node
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original code | |
from funcy import ireductions | |
def itemcollector(last, x): | |
if "Node ID" in x: | |
return x | |
if "files" not in last: | |
last["files"] = [] | |
last["files"].append(x) | |
return last | |
def itemgenerator(stream): | |
items = ireductions(itemcollector, stream) | |
last = next(items) | |
for item in items: | |
if item["Node ID"] != last["Node ID"]: | |
yield last | |
last = item | |
# First we need to process rows in context of their node, which is last with "Node ID" field. | |
# So we'll write an iterator annotating function. | |
# This one could be easily generalized if we ever need to. | |
def with_node(seq): | |
node = None | |
for row in seq: | |
if "Node ID" in row: | |
node = row | |
yield row, node | |
# Now we can just group by annotation | |
def itemgenerator(seq): | |
for node, annotated_rows in groupby(with_node(seq), itemgetter(1)): | |
# magic map() from funcy strips annotation from sequence, | |
# see http://funcy.readthedocs.org/en/latest/extended_fns.html#extended-fns | |
# map(itemgetter(0)) or list comprehension could be used if you not into this kind of magic | |
node["files"] = map(0, rest(annotated_rows)) | |
yield node | |
# imports, go here to not distract anyones attention | |
from operator import itemgetter | |
from itertools import groupby | |
from funcy import map, rest # you can do import map as xmap if you mind overwriting built-in | |
# Sure you could try grouping without annotation, but that will require some complicated logic. | |
# You can start with itertools.groupby or funcy.partition_by using this predicate: | |
is_node = lambda row: "Node ID" in row | |
# Don't know there it will lead you | |
# You can also annotate with just node id, not entire node. And then split like: | |
for annotated_rows in partition_by(with_node_id(seq), itemgetter(1)): | |
rows = map(0, annotated_rows) | |
node = rows[0] | |
node['files'] = rows[1:] | |
yield node |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment