Skip to content

Instantly share code, notes, and snippets.

@Suor
Created November 19, 2013 05:08
Show Gist options
  • Save Suor/7540628 to your computer and use it in GitHub Desktop.
Save Suor/7540628 to your computer and use it in GitHub Desktop.
Grouping items by lead node
# Original code
from funcy import ireductions
def itemcollector(last, x):
if "Node ID" in x:
return x
if "files" not in last:
last["files"] = []
last["files"].append(x)
return last
def itemgenerator(stream):
items = ireductions(itemcollector, stream)
last = next(items)
for item in items:
if item["Node ID"] != last["Node ID"]:
yield last
last = item
# First we need to process rows in context of their node, which is last with "Node ID" field.
# So we'll write an iterator annotating function.
# This one could be easily generalized if we ever need to.
def with_node(seq):
node = None
for row in seq:
if "Node ID" in row:
node = row
yield row, node
# Now we can just group by annotation
def itemgenerator(seq):
for node, annotated_rows in groupby(with_node(seq), itemgetter(1)):
# magic map() from funcy strips annotation from sequence,
# see http://funcy.readthedocs.org/en/latest/extended_fns.html#extended-fns
# map(itemgetter(0)) or list comprehension could be used if you not into this kind of magic
node["files"] = map(0, rest(annotated_rows))
yield node
# imports, go here to not distract anyones attention
from operator import itemgetter
from itertools import groupby
from funcy import map, rest # you can do import map as xmap if you mind overwriting built-in
# Sure you could try grouping without annotation, but that will require some complicated logic.
# You can start with itertools.groupby or funcy.partition_by using this predicate:
is_node = lambda row: "Node ID" in row
# Don't know there it will lead you
# You can also annotate with just node id, not entire node. And then split like:
for annotated_rows in partition_by(with_node_id(seq), itemgetter(1)):
rows = map(0, annotated_rows)
node = rows[0]
node['files'] = rows[1:]
yield node
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment