Last active
November 9, 2025 11:06
-
-
Save lubosz/5e166a1fd758ac2583b847a53fc7a36c to your computer and use it in GitHub Desktop.
Python script to count string occurances in a git repo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| from pathlib import Path | |
| import subprocess | |
| from tabulate import tabulate | |
| def main(): | |
| parser = argparse.ArgumentParser(prog='count-occurrences', description='Count string occurrences in git repo.') | |
| parser.add_argument('repo_path', type=Path) | |
| parser.add_argument('needle', type=str) | |
| args = parser.parse_args() | |
| output = subprocess.check_output(["git", "grep", "-c", args.needle], cwd=args.repo_path) | |
| res = {} | |
| for line in output.decode().split("\n"): | |
| if line: | |
| split = line.split(":") | |
| assert len(split) == 2 | |
| file_path = Path(split[0]) | |
| count = int(split[1]) | |
| project_str = "/".join(file_path.parts[:2]) | |
| if project_str not in res: | |
| res[project_str] = count | |
| else: | |
| res[project_str] += count | |
| print(tabulate(res.items())) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment