Last active
February 9, 2025 22:27
-
-
Save imaurer/a7c2a556bd3cce4a1ef7a75d90c3cf19 to your computer and use it in GitHub Desktop.
Configurable script for filtering files found in a git repo and copying content to clipboard
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S uv --quiet run --script | |
# /// script | |
# requires-python = ">=3.11" | |
# dependencies = [ | |
# "click", | |
# "pyperclip", | |
# "pathspec", | |
# "rich", | |
# ] | |
# /// | |
""" | |
pb.py | |
Behavior Summary: | |
1) If the first leftover arg is a known GROUP in the config: | |
- Use that group's includes/excludes | |
- Any leftover arguments after the group are added as *extra* includes (union). | |
2) If the first leftover arg is *not* a known GROUP (or there are no leftover args): | |
- If no leftover args at all and we have a "default" group, use it. | |
Else fallback to includes=["**"], excludes=[]. | |
- If leftover args are present (and not recognized as a group), | |
we ignore the config and treat all leftover args as the complete | |
includes set. (We do not union with "default" or anything.) | |
3) For each leftover arg used as an include: | |
- If it's a real file or directory on disk (relative to CWD or absolute), | |
convert it to a path relative to the Git repo root. | |
If it's a directory, append "/" so we match everything inside it. | |
- Otherwise, treat it as a wildcard pattern. | |
4) -x / --exclude always appends to excludes. | |
Hence: | |
pb . # => If '.' is not a group, includes = [that directory only] | |
pb nvim # => If 'nvim' is a group, use config's includes | |
pb nvim coverage.sh # => group 'nvim', plus an extra include for coverage.sh | |
pb foo bar # => no recognized group 'foo', so includes=[foo, bar] only | |
""" | |
import subprocess | |
import sys | |
from pathlib import Path | |
from typing import Any, Dict, List, Set, Tuple | |
import click | |
import pathspec | |
import pyperclip | |
import tomllib | |
from rich import box | |
from rich.console import Console | |
from rich.table import Table | |
CONFIG_FILENAMES = [".pbconfig", ".pbconf", "pbconf.toml", "pbconfig.toml"] | |
console = Console() | |
@click.command( | |
context_settings={"ignore_unknown_options": True, "allow_extra_args": True} | |
) | |
@click.option("-x", "--exclude", multiple=True, help="Exclude pattern(s).") | |
@click.option("-q", "--quiet", is_flag=True, default=False, help="Suppress all output.") | |
@click.option("-v", "--verbose", is_flag=True, default=False, help="Show detailed output.") | |
@click.pass_context | |
def main(ctx: click.Context, exclude: Tuple[str], quiet: bool, verbose: bool): | |
# 1) Grab leftover arguments | |
leftover_args = ctx.args | |
# 2) Determine Git root | |
try: | |
git_root = subprocess.check_output( | |
["git", "rev-parse", "--show-toplevel"], text=True | |
).strip() | |
except subprocess.CalledProcessError: | |
if not quiet: | |
console.print("[bold red]Error:[/bold red] Not in a Git repository.") | |
sys.exit(1) | |
root_path = Path(git_root) | |
# 3) Load config file if any | |
config_path = None | |
for fname in CONFIG_FILENAMES: | |
test_path = root_path / fname | |
if test_path.is_file(): | |
config_path = test_path | |
break | |
config_data: Dict[str, Any] = {} | |
if config_path: | |
if verbose and not quiet: | |
console.print(f"⚙ Loading config file: {config_path}") | |
try: | |
with config_path.open("rb") as f: | |
config_data = tomllib.load(f) | |
except Exception as e: | |
if verbose and not quiet: | |
console.print(f"[bold red]Warning:[/bold red] Could not parse {config_path}: {e}") | |
else: | |
if verbose and not quiet: | |
console.print("[yellow]No config file found.[/yellow]") | |
# 4) Decide how to compute includes/excludes | |
if not leftover_args: | |
# No leftover args -> use "default" if present, else fallback | |
if "default" in config_data: | |
chosen_group = "default" | |
(grp_inc_exc, chain_info, group_details) = get_group_patterns( | |
config_data, chosen_group | |
) | |
final_includes, final_excludes = grp_inc_exc | |
else: | |
chosen_group = None | |
final_includes = ["**"] | |
final_excludes = [] | |
chain_info = [] | |
group_details = {} | |
else: | |
# We have leftover args | |
first_arg = leftover_args[0] | |
if first_arg in config_data: | |
# This is a recognized group | |
chosen_group = first_arg | |
# Any leftover after this are extra includes | |
extra_includes = leftover_args[1:] | |
# Load group | |
(grp_inc_exc, chain_info, group_details) = get_group_patterns( | |
config_data, chosen_group | |
) | |
group_includes, group_excludes = grp_inc_exc | |
# Convert leftover includes to actual patterns | |
extra_includes_paths = [ path_or_pattern(a, root_path) for a in extra_includes ] | |
# Merge group + extras (union) | |
final_includes = group_includes + extra_includes_paths | |
final_excludes = group_excludes | |
else: | |
# The first arg is NOT a recognized group => we ignore config | |
chosen_group = None | |
final_includes = [] | |
final_excludes = [] | |
# All leftover args are includes | |
for arg in leftover_args: | |
final_includes.append( path_or_pattern(arg, root_path) ) | |
chain_info = [] | |
group_details = {} | |
# 5) Append excludes from -x | |
for xpat in exclude: | |
final_excludes.append(interpret_pattern(xpat)) | |
# 6) Optional verbose info | |
if verbose and not quiet: | |
if chosen_group: | |
console.print(f"Using group [magenta]{chosen_group}[/magenta].") | |
if chain_info: | |
console.print(f"Inheritance chain: {' -> '.join(chain_info)}\n") | |
else: | |
console.print("No group used (direct leftover args).") | |
console.print(f"[bold]Final includes[/bold]: {final_includes}") | |
console.print(f"[bold]Final excludes[/bold]: {final_excludes}") | |
console.print() | |
# 7) Run git ls-files | |
try: | |
ls_files_out = subprocess.check_output( | |
["git", "ls-files"], text=True, cwd=git_root | |
) | |
except subprocess.CalledProcessError as e: | |
if not quiet: | |
console.print(f"[bold red]Error[/bold red] running git ls-files: {e}") | |
sys.exit(1) | |
all_files = ls_files_out.strip().splitlines() | |
# 8) PathSpec filtering | |
inc_spec = pathspec.PathSpec.from_lines("gitwildmatch", final_includes) | |
exc_spec = pathspec.PathSpec.from_lines("gitwildmatch", final_excludes) | |
selected = [] | |
for f in all_files: | |
if inc_spec.match_file(f) and not exc_spec.match_file(f): | |
selected.append(f) | |
if not selected: | |
if not quiet: | |
console.print("[bold yellow]No files matched the final filters.[/bold yellow]") | |
sys.exit(0) | |
# 9) Read content, copy to clipboard | |
clipboard_chunks = [] | |
results = [] | |
total_lines = 0 | |
total_words = 0 | |
total_bytes = 0 | |
for path_str in selected: | |
full_path = root_path / path_str | |
try: | |
content = full_path.read_text(encoding="utf-8", errors="replace") | |
except Exception: | |
content = "" | |
lines = content.count("\n") + (1 if content else 0) | |
words = len(content.split()) | |
bcount = len(content.encode("utf-8", errors="replace")) | |
total_lines += lines | |
total_words += words | |
total_bytes += bcount | |
results.append((lines, words, bcount, path_str)) | |
clipboard_chunks.append(f"==> {path_str} <==\n{content}") | |
pyperclip.copy("\n".join(clipboard_chunks)) | |
# 10) If quiet => done | |
if quiet: | |
return | |
# 11) Print table | |
table = Table(box=box.MINIMAL, show_header=True, header_style="bold cyan") | |
table.add_column("LINES", justify="right", style="bold cyan", no_wrap=True) | |
table.add_column("WORDS", justify="right", style="bold magenta", no_wrap=True) | |
table.add_column("BYTES", justify="right", style="bold green", no_wrap=True) | |
table.add_column("FILE", style="yellow") | |
for lcount, wcount, bcount, fname in results: | |
table.add_row(str(lcount), str(wcount), str(bcount), fname) | |
table.add_row(str(total_lines), str(total_words), str(total_bytes), "total", style="bold") | |
console.print(table) | |
console.print( | |
f"{len(selected)} files, {total_lines} lines, {total_words} words, {total_bytes} bytes" | |
) | |
console.print("✅ Copied all file contents to clipboard.") | |
# -------------------------------------------------------------------- | |
# Helpers | |
# -------------------------------------------------------------------- | |
def path_or_pattern(arg: str, root_path: Path) -> str: | |
""" | |
Convert a leftover CLI argument into a pathspec pattern. | |
- If arg is an existing file/directory (absolute or relative to CWD), | |
we return the path *relative to git root*. | |
If it's a directory, we append '/' so that PathSpec matches all contents. | |
- Otherwise, we treat arg as a wildcard pattern. | |
""" | |
p = Path(arg) | |
# 1) If absolute path | |
if p.is_absolute(): | |
if p.exists(): | |
try: | |
rel = p.resolve().relative_to(root_path.resolve()) | |
# If it's a directory, append slash | |
if (root_path / rel).is_dir(): | |
return f"{rel}/" | |
return str(rel) | |
except ValueError: | |
# not inside repo => treat as pattern | |
return interpret_pattern(arg) | |
else: | |
# doesn't exist => pattern | |
return interpret_pattern(arg) | |
else: | |
# 2) If relative path | |
# check if (CWD / p) exists | |
actual = (Path.cwd() / p).resolve() | |
if actual.exists(): | |
try: | |
rel = actual.relative_to(root_path.resolve()) | |
if (root_path / rel).is_dir(): | |
return f"{rel}/" | |
return str(rel) | |
except ValueError: | |
# not inside the repo => treat as pattern | |
return interpret_pattern(arg) | |
else: | |
# maybe it exists if we interpret as (root_path / p)? | |
r2 = (root_path / p).resolve() | |
if r2.exists(): | |
try: | |
rel2 = r2.relative_to(root_path.resolve()) | |
if (root_path / rel2).is_dir(): | |
return f"{rel2}/" | |
return str(rel2) | |
except ValueError: | |
return interpret_pattern(arg) | |
# else => pattern | |
return interpret_pattern(arg) | |
def get_group_patterns( | |
config_data: Dict[str, Any], | |
group_name: str, | |
visited: Set[str] = None | |
) -> Tuple[Tuple[List[str], List[str]], List[str], Dict[str, Dict[str, List[str]]]]: | |
"""Compute includes/excludes for group_name with inheritance.""" | |
if visited is None: | |
visited = set() | |
if group_name in visited: | |
console.print( | |
f"[bold red]Error[/bold red]: cycle in inherits for group '{group_name}'" | |
) | |
return (["**"], []), [group_name], {} | |
visited.add(group_name) | |
gtable = config_data.get(group_name, {}) | |
parent_name = gtable.get("inherits") | |
if isinstance(parent_name, str) and parent_name.strip() in config_data: | |
(pats_parent, parent_chain, parent_details) = get_group_patterns( | |
config_data, parent_name.strip(), visited | |
) | |
(inc_parent, exc_parent) = pats_parent | |
chain_info = parent_chain | |
group_details = parent_details | |
else: | |
inc_parent, exc_parent = (["**"], []) | |
chain_info = [] | |
group_details = {} | |
declared_inc = gtable.get("include") | |
declared_exc = gtable.get("exclude") | |
if declared_inc is not None: | |
inc_self = [interpret_pattern(s) for s in declared_inc] | |
else: | |
inc_self = inc_parent | |
if declared_exc is not None: | |
exc_self = [interpret_pattern(s) for s in declared_exc] | |
else: | |
exc_self = exc_parent | |
chain_info.append(group_name) | |
group_details[group_name] = { | |
"include": declared_inc, | |
"exclude": declared_exc, | |
} | |
return ((inc_self, exc_self), chain_info, group_details) | |
def interpret_pattern(raw: str) -> str: | |
""" | |
If it has glob chars => keep it. | |
If it starts with '.' => interpret as extension => '*.py' | |
If empty => '**' | |
Otherwise => literal name | |
""" | |
raw = raw.strip() | |
if not raw: | |
return "**" | |
if any(ch in raw for ch in ["*", "?", "[", "]", "!", "/"]): | |
return raw | |
if raw.startswith(".") and len(raw) > 1: | |
return f"*{raw}" | |
return raw | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment