Created
August 1, 2012 17:47
-
-
Save Lysander/3229182 to your computer and use it in GitHub Desktop.
Sort a file by an indexfile which defines the order; http://forum.ubuntuusers.de/topic/logische-zusammenfuegen-von-2-dateien/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Tobi Testperson 7 38 | |
| MaxMusterPerson3 5 12 | |
| NachnamePerson3 43 23 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| MaxMusterPerson3 | |
| Tobi Testperson | |
| NachnamePerson3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ./sort.py index.txt < data.txt | |
| MaxMusterPerson3 5 12 | |
| Tobi Testperson 7 38 | |
| NachnamePerson3 43 23 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python2 | |
| # coding: utf-8 | |
| import sys | |
| import argparse | |
| from contextlib import closing | |
| from collections import defaultdict | |
| def read_index(fileobj): | |
| with closing(fileobj): | |
| for line in fileobj: | |
| yield line.strip() | |
| def parse_data(fileobj): | |
| data = defaultdict(list) | |
| with closing(fileobj): | |
| for row in fileobj: | |
| # with Python3 this is much nicer :-) | |
| #key, *values = row.rsplit(None, 2) | |
| #data[key] = values | |
| key, a, b = row.rsplit(None, 2) | |
| data[key] = [a, b] | |
| return data | |
| def save(data, fileobj): | |
| with closing(fileobj): | |
| fileobj.write("\n".join(data)) | |
| def sort(index, data): | |
| for key in index: | |
| yield "{} {}".format(key, " ".join(data[key])) | |
| def main(): | |
| parser = argparse.ArgumentParser(description="sort a file by an indexfile") | |
| parser.add_argument("index", type=argparse.FileType("r")) | |
| parser.add_argument("infile", nargs="?", type=argparse.FileType("r"), | |
| default=sys.stdin) | |
| parser.add_argument("outfile", nargs="?", type=argparse.FileType("w"), | |
| default=sys.stdout) | |
| args = parser.parse_args() | |
| save(sort(read_index(args.index), parse_data(args.infile)), args.outfile) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment