Created
November 19, 2023 16:14
-
-
Save tos-kamiya/ee8499f35cd0b4e0d2f830720a194cbc to your computer and use it in GitHub Desktop.
A tool for aggregating file extensions and languages in a directory, compatible with the tokei project (https://github.com/XAMPPRocky/tokei).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The languages.json is obtained from the tokei project | |
# Source: https://github.com/XAMPPRocky/tokei/blob/c8e4d0703252c87b1df45382b365c6bb00769dbe/languages.json | |
from typing import Dict, Counter as CounterType | |
from collections import Counter | |
import json | |
import os | |
import sys | |
def load_ext_to_lang_name() -> Dict[str, str]: | |
""" | |
Load a dictionary mapping file extensions to language names. | |
:return: Dictionary with file extensions as keys and language names as values. | |
""" | |
# Retrieve the configuration file | |
script_dir = os.path.dirname(os.path.abspath(__file__)) | |
languages_json_path = os.path.join(script_dir, "languages.json") | |
with open(languages_json_path, "r") as inp: | |
lang_data = json.load(inp) | |
# Parse the configuration file to build a dictionary from extension to language name | |
tbl = lang_data["languages"] | |
ext_to_name = dict() | |
for lang_id, info in tbl.items(): | |
name = info.get("name", lang_id) | |
extensions = info.get("extensions") | |
if extensions: | |
for ext in extensions: | |
if not ext.startswith("."): | |
ext = "." + ext | |
ext_to_name[ext] = name | |
return ext_to_name | |
def count_file_extensions(directory: str) -> CounterType[str]: | |
""" | |
Count file extensions in a directory. | |
:param directory: Path of the directory to search. | |
:return: A Counter object counting files per extension. | |
""" | |
counts = Counter() | |
for _, _, files in os.walk(directory): | |
for file in files: | |
ext = os.path.splitext(file)[1] | |
counts[ext] += 1 | |
return counts | |
__doc__ = """ | |
lang_ext_counter.py is a tool for aggregating file extensions and languages in a directory, compatible with the tokei project (https://github.com/XAMPPRocky/tokei). | |
It analyzes all files in a specified directory, counts the occurrences of each file extension, | |
and maps them to their corresponding programming languages using data from tokei's languages.json. | |
""" | |
def main() -> None: | |
directory = sys.argv[1] if len(sys.argv) > 1 else os.curdir | |
ext_to_lang_name = load_ext_to_lang_name() | |
counts = count_file_extensions(directory) | |
ext_name_count_list = [(ext, ext_to_lang_name.get(ext) or "-", count) for ext, count in counts.items()] | |
print("extension\ttype\tcount") | |
for ext, name, count in sorted(ext_name_count_list): | |
print(f"{ext}\t{name}\t{count}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment