Suor · November 19, 2013 05:08
diff --git a/group_by_lead.py b/group_by_lead.py
 # Original code
 from funcy import ireductions

 def itemcollector(last, x):
    if "Node ID" in x:
        return x

    if "files" not in last:
        last["files"] = []
    last["files"].append(x)
    return last


 def itemgenerator(stream):
    items = ireductions(itemcollector, stream)
    last = next(items)
    for item in items:
        if item["Node ID"] != last["Node ID"]:
            yield last
            last = item


 # First we need to process rows in context of their node, which is last with "Node ID" field.
 # So we'll write an iterator annotating function.
 # This one could be easily generalized if we ever need to.
 def with_node(seq):
    node = None
    for row in seq:
        if "Node ID" in row:
            node = row
        yield row, node

 # Now we can just group by annotation
 def itemgenerator(seq):
    for node, annotated_rows in groupby(with_node(seq), itemgetter(1)):
        # magic map() from funcy strips annotation from sequence, 
        # see http://funcy.readthedocs.org/en/latest/extended_fns.html#extended-fns
        # map(itemgetter(0)) or list comprehension could be used if you not into this kind of magic
        node["files"] = map(0, rest(annotated_rows)) 
        yield node

 # imports, go here to not distract anyones attention
 from operator import itemgetter
 from itertools import groupby
 from funcy import map, rest # you can do import map as xmap if you mind overwriting built-in


 # Sure you could try grouping without annotation, but that will require some complicated logic.
 # You can start with itertools.groupby or funcy.partition_by using this predicate:
 is_node = lambda row: "Node ID" in row
 # Don't know there it will lead you

 # You can also annotate with just node id, not entire node. And then split like:
 for annotated_rows in partition_by(with_node_id(seq), itemgetter(1)):
    rows = map(0, annotated_rows)
    node = rows[0]
    node['files'] = rows[1:]
    yield node
	# Original code
	from funcy import ireductions

	def itemcollector(last, x):
	if "Node ID" in x:
	return x

	if "files" not in last:
	last["files"] = []
	last["files"].append(x)
	return last


	def itemgenerator(stream):
	items = ireductions(itemcollector, stream)
	last = next(items)
	for item in items:
	if item["Node ID"] != last["Node ID"]:
	yield last
	last = item


	# First we need to process rows in context of their node, which is last with "Node ID" field.
	# So we'll write an iterator annotating function.
	# This one could be easily generalized if we ever need to.
	def with_node(seq):
	node = None
	for row in seq:
	if "Node ID" in row:
	node = row
	yield row, node

	# Now we can just group by annotation
	def itemgenerator(seq):
	for node, annotated_rows in groupby(with_node(seq), itemgetter(1)):
	# magic map() from funcy strips annotation from sequence,
	# see http://funcy.readthedocs.org/en/latest/extended_fns.html#extended-fns
	# map(itemgetter(0)) or list comprehension could be used if you not into this kind of magic
	node["files"] = map(0, rest(annotated_rows))
	yield node

	# imports, go here to not distract anyones attention
	from operator import itemgetter
	from itertools import groupby
	from funcy import map, rest # you can do import map as xmap if you mind overwriting built-in


	# Sure you could try grouping without annotation, but that will require some complicated logic.
	# You can start with itertools.groupby or funcy.partition_by using this predicate:
	is_node = lambda row: "Node ID" in row
	# Don't know there it will lead you

	# You can also annotate with just node id, not entire node. And then split like:
	for annotated_rows in partition_by(with_node_id(seq), itemgetter(1)):
	rows = map(0, annotated_rows)
	node = rows[0]
	node['files'] = rows[1:]
	yield node