Last active
February 5, 2022 19:06
-
-
Save c-w/82fcf284528728d934afd4f2e5ba28f3 to your computer and use it in GitHub Desktop.
Analyze item counts in CSV columns
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
from argparse import ArgumentParser, FileType | |
from csv import DictReader, DictWriter | |
from sys import stdout | |
parser = ArgumentParser() | |
parser.add_argument("infile", type=FileType("r", encoding="utf-8")) | |
parser.add_argument("column_name") | |
parser.add_argument("-o", "--outfile", type=FileType("w", encoding="utf-8"), default=stdout) | |
args = parser.parse_args() | |
f_in = args.infile | |
f_out = args.outfile | |
column_name = args.column_name | |
reader = DictReader(f_in) | |
records = Counter(row[column_name] for row in reader) | |
writer = DictWriter(f_out, fieldnames=["record", "count"], delimiter="\t") | |
writer.writeheader() | |
for record, count in records.most_common(): | |
writer.writerow({"record": record, "count": count}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment