Last active
February 26, 2021 03:56
-
-
Save rodrigoddc/7e6169ad3dc72218e24492347b47b121 to your computer and use it in GitHub Desktop.
wordcount
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import itertools | |
| from collections import Counter | |
| from operator import itemgetter | |
| def file_to_string_list(filename) -> list: | |
| try: | |
| with open(filename, 'r') as f: | |
| some_strs = [x.split() for x in f.readlines()] | |
| # opções pra devolver uma flat list de some_strs (nested list) acima | |
| # https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists | |
| # from django.contrib.admin.utils import flatten | |
| # flatten(some_strs) | |
| # x = reduce(lambda x, y: x+y, some_strs) | |
| # reduce(lambda x, y: x + y, [1, 2, 3, 4, 5]) calcula ((((1 + 2) + 3) + 4) + 5) | |
| # O mesmo que a + b, para sequências a e b | |
| # import operator | |
| # x = reduce(operator.concat, some_strs) | |
| # Usando o itertools é o mais rápido | |
| return list(itertools.chain.from_iterable(some_strs)) | |
| except FileNotFoundError as e: | |
| raise FileNotFoundError("Arquivo nao encontrado") from e | |
| def print_words(filename): | |
| x = file_to_string_list(filename) | |
| items = sorted(Counter(c.lower() for c in x).items(), key=itemgetter(0)) | |
| for letter, count in items: | |
| print(f"letra: '{letter}' ocorrencia: {count}x") | |
| def print_top(filename): | |
| x = file_to_string_list(filename) | |
| items = Counter(c.lower() for c in x).most_common()[:20] | |
| for index, (letter, count) in enumerate(items, start=1): | |
| print(f"{index}º letra: '{letter}' ocorrencia: {count}x") | |
| # A função abaixo chama print_words() ou print_top() de acordo com os | |
| # parêtros do programa. | |
| def main(): | |
| if len(sys.argv) != 3: | |
| print('Utilização: ./13_wordcount.py {--count | --topcount} file') | |
| sys.exit(1) | |
| option = sys.argv[1] | |
| filename = sys.argv[2] | |
| if option == '--count': | |
| print_words(filename) | |
| elif option == '--topcount': | |
| print_top(filename) | |
| else: | |
| print('opção desconhecida: ' + option) | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment