Skip to content

Instantly share code, notes, and snippets.

@stavxyz
Last active October 23, 2015 18:01
Show Gist options
  • Save stavxyz/b7b7e0c503397aeaa5e3 to your computer and use it in GitHub Desktop.
Save stavxyz/b7b7e0c503397aeaa5e3 to your computer and use it in GitHub Desktop.
stdout columns to list of dicts - now supporting tabs or whatever and multi-columns

To take any bunch of printed columns from stdout and turn them into lists of dictionaries...

def parse_block(block, headers=True, delimiter=None):
    """Return a list of items for each row.

    If there are headers in the block of text, they will
    be parsed out and a list of dicts will be returned.
    If there are no headers, a list of tuples will be returned.

    If delimiter is None, it is passed to str.split as is,
    which is interpreted to mean "whitespace", such as tabs
    and spaces.
    """
    # remove nullish lines
    lines = [l.strip() for l in block.splitlines() if l.strip()]
    # split first line on whitespace to count the headers
    if not lines:
        return lines
    cols = len(lines[0].split(delimiter))
    # make a list of lists with clean elements of equal length
    breakout = [k.split(delimiter, cols) for k in lines]
    if headers:
        headers, values = breakout[0], breakout[1:]
        return [dict(zip(headers, vals)) for vals in values]
    else:
        return [tuple(vals) for vals in breakout]


machines = """
MACHINE     METADATA
22e994f51ce04e098253d52a91ec4431 role=services
42186fe9d3cc478ea918f381c479d890 environment=staging,role=worker
5cf1ac5803264981830308ffdda1d9a2 role=services
74eb21c316d34b37bd00335974f6b8c2 role=services
9c63be10638b4aa88e548b688b6fc7a2 environment=dev,role=worker
a0fbca206cc043838c0aeddb853769fa environment=staging,role=worker
ae0f612d223b41c0a4865fb5c8d0f4b1 role=services
b098fcfbc0c24e849654328edc28700b environment=dev,role=worker
c1d4cc4c9b4a468bb69409b5c9777bc0 role=services
"""

print parse_block(machines)

[{'MACHINE': '22e994f51ce04e098253d52a91ec4431', 'METADATA': 'role=services'},
 {'MACHINE': '42186fe9d3cc478ea918f381c479d890',
  'METADATA': 'environment=staging,role=worker'},
 {'MACHINE': '5cf1ac5803264981830308ffdda1d9a2', 'METADATA': 'role=services'},
 {'MACHINE': '74eb21c316d34b37bd00335974f6b8c2', 'METADATA': 'role=services'},
 {'MACHINE': '9c63be10638b4aa88e548b688b6fc7a2',
  'METADATA': 'environment=dev,role=worker'},
 {'MACHINE': 'a0fbca206cc043838c0aeddb853769fa',
  'METADATA': 'environment=staging,role=worker'},
 {'MACHINE': 'ae0f612d223b41c0a4865fb5c8d0f4b1', 'METADATA': 'role=services'},
 {'MACHINE': 'b098fcfbc0c24e849654328edc28700b',
  'METADATA': 'environment=dev,role=worker'},
 {'MACHINE': 'c1d4cc4c9b4a468bb69409b5c9777bc0', 'METADATA': 'role=services'}]


foo = """
MACHINE  \t   METADATA  OTHER
22e994f51ce04e098253d52a91ec4431 role=services bar
42186fe9d3cc478ea918f381c479d890 environment=staging,role=worker\t baz
5cf1ac5803264981830308ffdda1d9a2 role=services   \tbing
74eb21c316d34b37bd00335974f6b8c2\t   role=services qux
9c63be10638b4aa88e548b688b6fc7a2 environment=dev,role=worker fred
a0fbca206cc043838c0aeddb853769fa environment=staging,role=worker waldo
ae0f612d223b41c0a4865fb5c8d0f4b1 role=services     xyzzy
b098fcfbc0c24e849654328edc28700b environment=dev,role=worker     thud
c1d4cc4c9b4a468bb69409b5c9777bc0 role=services\t  wobble
"""

print parse_block(foo)
 
 [{'MACHINE': '22e994f51ce04e098253d52a91ec4431',
  'METADATA': 'role=services',
  'OTHER': 'bar'},
 {'MACHINE': '42186fe9d3cc478ea918f381c479d890',
  'METADATA': 'environment=staging,role=worker',
  'OTHER': 'baz'},
 {'MACHINE': '5cf1ac5803264981830308ffdda1d9a2',
  'METADATA': 'role=services',
  'OTHER': 'bing'},
 {'MACHINE': '74eb21c316d34b37bd00335974f6b8c2',
  'METADATA': 'role=services',
  'OTHER': 'qux'},
 {'MACHINE': '9c63be10638b4aa88e548b688b6fc7a2',
  'METADATA': 'environment=dev,role=worker',
  'OTHER': 'fred'},
 {'MACHINE': 'a0fbca206cc043838c0aeddb853769fa',
  'METADATA': 'environment=staging,role=worker',
  'OTHER': 'waldo'},
 {'MACHINE': 'ae0f612d223b41c0a4865fb5c8d0f4b1',
  'METADATA': 'role=services',
  'OTHER': 'xyzzy'},
 {'MACHINE': 'b098fcfbc0c24e849654328edc28700b',
  'METADATA': 'environment=dev,role=worker',
  'OTHER': 'thud'},
 {'MACHINE': 'c1d4cc4c9b4a468bb69409b5c9777bc0',
  'METADATA': 'role=services',
  'OTHER': 'wobble'}]
def parse_block(block, headers=True, delimiter=None):
"""Return a list of items for each row.
If there are headers in the block of text, they will
be parsed out and a list of dicts will be returned.
If there are no headers, a list of tuples will be returned.
If delimiter is None, it is passed to str.split as is,
which is interpreted to mean "whitespace", such as tabs
and spaces.
"""
# remove nullish lines
lines = [l.strip() for l in block.splitlines() if l.strip()]
# split first line on whitespace to count the headers
if not lines:
return lines
cols = len(lines[0].split(delimiter))
# make a list of lists with clean elements of equal length
breakout = [k.split(delimiter, cols) for k in lines]
if headers:
headers, values = breakout[0], breakout[1:]
return [dict(zip(headers, vals)) for vals in values]
else:
return [tuple(vals) for vals in breakout]
if __name__ == '__main__':
from pprint import pprint
machines = """
MACHINE METADATA
22e994f51ce04e098253d52a91ec4431 role=services
42186fe9d3cc478ea918f381c479d890 environment=staging,role=worker
5cf1ac5803264981830308ffdda1d9a2 role=services
74eb21c316d34b37bd00335974f6b8c2 role=services
9c63be10638b4aa88e548b688b6fc7a2 environment=dev,role=worker
a0fbca206cc043838c0aeddb853769fa environment=staging,role=worker
ae0f612d223b41c0a4865fb5c8d0f4b1 role=services
b098fcfbc0c24e849654328edc28700b environment=dev,role=worker
c1d4cc4c9b4a468bb69409b5c9777bc0 role=services
"""
print('\nInput:\n%s\n' % machines)
pprint(parse_block(machines))
foo = """
MACHINE \t METADATA OTHER
22e994f51ce04e098253d52a91ec4431 role=services bar
42186fe9d3cc478ea918f381c479d890 environment=staging,role=worker\t baz
5cf1ac5803264981830308ffdda1d9a2 role=services \tbing
74eb21c316d34b37bd00335974f6b8c2\t role=services qux
9c63be10638b4aa88e548b688b6fc7a2 environment=dev,role=worker fred
a0fbca206cc043838c0aeddb853769fa environment=staging,role=worker waldo
ae0f612d223b41c0a4865fb5c8d0f4b1 role=services xyzzy
b098fcfbc0c24e849654328edc28700b environment=dev,role=worker thud
c1d4cc4c9b4a468bb69409b5c9777bc0 role=services\t wobble
"""
print('\nInput:\n%s\n' % foo)
pprint(parse_block(foo))
foo = """
key0 foo0 bar0
key1 foo1 bar1
key2 foo2 bar2
key3 foo3 bar3
key4 foo4 bar4
"""
print('\nInput:\n%s\n' % foo)
pprint(parse_block(foo, headers=False))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment