Created
July 22, 2022 20:47
-
-
Save cicorias/106d59b9eb133fe6399f322a316a7135 to your computer and use it in GitHub Desktop.
Parses output from Hadoop DFSIO utility
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
def parse_file(file_name): | |
keys = list() | |
rows = list() | |
row = dict() | |
with open(file_name, 'r') as log: | |
items = [line.split(':', 1) for line in log] | |
for item in items: | |
if len(item) < 2: | |
rows.append(row) | |
row = dict() | |
continue | |
key = item[0].strip() | |
value = item[1].strip() | |
if key not in keys: | |
keys.append(key) | |
row[key] = value | |
return keys, rows | |
def write_csv(file_name, keys, rows): | |
import csv | |
with open(file_name, 'w') as outfile: | |
w = csv.DictWriter(outfile, keys) | |
w.writeheader() | |
w.writerows(rows) | |
def parse_arguments(): | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-f', '--file', help='File to parse') | |
parser.add_argument('-o', '--output', help='Output file') | |
return parser.parse_args() | |
def main(): | |
args = parse_arguments() | |
keys, rows = parse_file(args.file) | |
write_csv(args.output, keys, rows) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment