Skip to content

Instantly share code, notes, and snippets.

@imaurer
Last active February 9, 2025 22:27
Show Gist options
  • Save imaurer/a7c2a556bd3cce4a1ef7a75d90c3cf19 to your computer and use it in GitHub Desktop.
Save imaurer/a7c2a556bd3cce4a1ef7a75d90c3cf19 to your computer and use it in GitHub Desktop.
Configurable script for filtering files found in a git repo and copying content to clipboard
#!/usr/bin/env -S uv --quiet run --script
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "click",
# "pyperclip",
# "pathspec",
# "rich",
# ]
# ///
"""
pb.py
Behavior Summary:
1) If the first leftover arg is a known GROUP in the config:
- Use that group's includes/excludes
- Any leftover arguments after the group are added as *extra* includes (union).
2) If the first leftover arg is *not* a known GROUP (or there are no leftover args):
- If no leftover args at all and we have a "default" group, use it.
Else fallback to includes=["**"], excludes=[].
- If leftover args are present (and not recognized as a group),
we ignore the config and treat all leftover args as the complete
includes set. (We do not union with "default" or anything.)
3) For each leftover arg used as an include:
- If it's a real file or directory on disk (relative to CWD or absolute),
convert it to a path relative to the Git repo root.
If it's a directory, append "/" so we match everything inside it.
- Otherwise, treat it as a wildcard pattern.
4) -x / --exclude always appends to excludes.
Hence:
pb . # => If '.' is not a group, includes = [that directory only]
pb nvim # => If 'nvim' is a group, use config's includes
pb nvim coverage.sh # => group 'nvim', plus an extra include for coverage.sh
pb foo bar # => no recognized group 'foo', so includes=[foo, bar] only
"""
import subprocess
import sys
from pathlib import Path
from typing import Any, Dict, List, Set, Tuple
import click
import pathspec
import pyperclip
import tomllib
from rich import box
from rich.console import Console
from rich.table import Table
CONFIG_FILENAMES = [".pbconfig", ".pbconf", "pbconf.toml", "pbconfig.toml"]
console = Console()
@click.command(
context_settings={"ignore_unknown_options": True, "allow_extra_args": True}
)
@click.option("-x", "--exclude", multiple=True, help="Exclude pattern(s).")
@click.option("-q", "--quiet", is_flag=True, default=False, help="Suppress all output.")
@click.option("-v", "--verbose", is_flag=True, default=False, help="Show detailed output.")
@click.pass_context
def main(ctx: click.Context, exclude: Tuple[str], quiet: bool, verbose: bool):
# 1) Grab leftover arguments
leftover_args = ctx.args
# 2) Determine Git root
try:
git_root = subprocess.check_output(
["git", "rev-parse", "--show-toplevel"], text=True
).strip()
except subprocess.CalledProcessError:
if not quiet:
console.print("[bold red]Error:[/bold red] Not in a Git repository.")
sys.exit(1)
root_path = Path(git_root)
# 3) Load config file if any
config_path = None
for fname in CONFIG_FILENAMES:
test_path = root_path / fname
if test_path.is_file():
config_path = test_path
break
config_data: Dict[str, Any] = {}
if config_path:
if verbose and not quiet:
console.print(f"⚙ Loading config file: {config_path}")
try:
with config_path.open("rb") as f:
config_data = tomllib.load(f)
except Exception as e:
if verbose and not quiet:
console.print(f"[bold red]Warning:[/bold red] Could not parse {config_path}: {e}")
else:
if verbose and not quiet:
console.print("[yellow]No config file found.[/yellow]")
# 4) Decide how to compute includes/excludes
if not leftover_args:
# No leftover args -> use "default" if present, else fallback
if "default" in config_data:
chosen_group = "default"
(grp_inc_exc, chain_info, group_details) = get_group_patterns(
config_data, chosen_group
)
final_includes, final_excludes = grp_inc_exc
else:
chosen_group = None
final_includes = ["**"]
final_excludes = []
chain_info = []
group_details = {}
else:
# We have leftover args
first_arg = leftover_args[0]
if first_arg in config_data:
# This is a recognized group
chosen_group = first_arg
# Any leftover after this are extra includes
extra_includes = leftover_args[1:]
# Load group
(grp_inc_exc, chain_info, group_details) = get_group_patterns(
config_data, chosen_group
)
group_includes, group_excludes = grp_inc_exc
# Convert leftover includes to actual patterns
extra_includes_paths = [ path_or_pattern(a, root_path) for a in extra_includes ]
# Merge group + extras (union)
final_includes = group_includes + extra_includes_paths
final_excludes = group_excludes
else:
# The first arg is NOT a recognized group => we ignore config
chosen_group = None
final_includes = []
final_excludes = []
# All leftover args are includes
for arg in leftover_args:
final_includes.append( path_or_pattern(arg, root_path) )
chain_info = []
group_details = {}
# 5) Append excludes from -x
for xpat in exclude:
final_excludes.append(interpret_pattern(xpat))
# 6) Optional verbose info
if verbose and not quiet:
if chosen_group:
console.print(f"Using group [magenta]{chosen_group}[/magenta].")
if chain_info:
console.print(f"Inheritance chain: {' -> '.join(chain_info)}\n")
else:
console.print("No group used (direct leftover args).")
console.print(f"[bold]Final includes[/bold]: {final_includes}")
console.print(f"[bold]Final excludes[/bold]: {final_excludes}")
console.print()
# 7) Run git ls-files
try:
ls_files_out = subprocess.check_output(
["git", "ls-files"], text=True, cwd=git_root
)
except subprocess.CalledProcessError as e:
if not quiet:
console.print(f"[bold red]Error[/bold red] running git ls-files: {e}")
sys.exit(1)
all_files = ls_files_out.strip().splitlines()
# 8) PathSpec filtering
inc_spec = pathspec.PathSpec.from_lines("gitwildmatch", final_includes)
exc_spec = pathspec.PathSpec.from_lines("gitwildmatch", final_excludes)
selected = []
for f in all_files:
if inc_spec.match_file(f) and not exc_spec.match_file(f):
selected.append(f)
if not selected:
if not quiet:
console.print("[bold yellow]No files matched the final filters.[/bold yellow]")
sys.exit(0)
# 9) Read content, copy to clipboard
clipboard_chunks = []
results = []
total_lines = 0
total_words = 0
total_bytes = 0
for path_str in selected:
full_path = root_path / path_str
try:
content = full_path.read_text(encoding="utf-8", errors="replace")
except Exception:
content = ""
lines = content.count("\n") + (1 if content else 0)
words = len(content.split())
bcount = len(content.encode("utf-8", errors="replace"))
total_lines += lines
total_words += words
total_bytes += bcount
results.append((lines, words, bcount, path_str))
clipboard_chunks.append(f"==> {path_str} <==\n{content}")
pyperclip.copy("\n".join(clipboard_chunks))
# 10) If quiet => done
if quiet:
return
# 11) Print table
table = Table(box=box.MINIMAL, show_header=True, header_style="bold cyan")
table.add_column("LINES", justify="right", style="bold cyan", no_wrap=True)
table.add_column("WORDS", justify="right", style="bold magenta", no_wrap=True)
table.add_column("BYTES", justify="right", style="bold green", no_wrap=True)
table.add_column("FILE", style="yellow")
for lcount, wcount, bcount, fname in results:
table.add_row(str(lcount), str(wcount), str(bcount), fname)
table.add_row(str(total_lines), str(total_words), str(total_bytes), "total", style="bold")
console.print(table)
console.print(
f"{len(selected)} files, {total_lines} lines, {total_words} words, {total_bytes} bytes"
)
console.print("✅ Copied all file contents to clipboard.")
# --------------------------------------------------------------------
# Helpers
# --------------------------------------------------------------------
def path_or_pattern(arg: str, root_path: Path) -> str:
"""
Convert a leftover CLI argument into a pathspec pattern.
- If arg is an existing file/directory (absolute or relative to CWD),
we return the path *relative to git root*.
If it's a directory, we append '/' so that PathSpec matches all contents.
- Otherwise, we treat arg as a wildcard pattern.
"""
p = Path(arg)
# 1) If absolute path
if p.is_absolute():
if p.exists():
try:
rel = p.resolve().relative_to(root_path.resolve())
# If it's a directory, append slash
if (root_path / rel).is_dir():
return f"{rel}/"
return str(rel)
except ValueError:
# not inside repo => treat as pattern
return interpret_pattern(arg)
else:
# doesn't exist => pattern
return interpret_pattern(arg)
else:
# 2) If relative path
# check if (CWD / p) exists
actual = (Path.cwd() / p).resolve()
if actual.exists():
try:
rel = actual.relative_to(root_path.resolve())
if (root_path / rel).is_dir():
return f"{rel}/"
return str(rel)
except ValueError:
# not inside the repo => treat as pattern
return interpret_pattern(arg)
else:
# maybe it exists if we interpret as (root_path / p)?
r2 = (root_path / p).resolve()
if r2.exists():
try:
rel2 = r2.relative_to(root_path.resolve())
if (root_path / rel2).is_dir():
return f"{rel2}/"
return str(rel2)
except ValueError:
return interpret_pattern(arg)
# else => pattern
return interpret_pattern(arg)
def get_group_patterns(
config_data: Dict[str, Any],
group_name: str,
visited: Set[str] = None
) -> Tuple[Tuple[List[str], List[str]], List[str], Dict[str, Dict[str, List[str]]]]:
"""Compute includes/excludes for group_name with inheritance."""
if visited is None:
visited = set()
if group_name in visited:
console.print(
f"[bold red]Error[/bold red]: cycle in inherits for group '{group_name}'"
)
return (["**"], []), [group_name], {}
visited.add(group_name)
gtable = config_data.get(group_name, {})
parent_name = gtable.get("inherits")
if isinstance(parent_name, str) and parent_name.strip() in config_data:
(pats_parent, parent_chain, parent_details) = get_group_patterns(
config_data, parent_name.strip(), visited
)
(inc_parent, exc_parent) = pats_parent
chain_info = parent_chain
group_details = parent_details
else:
inc_parent, exc_parent = (["**"], [])
chain_info = []
group_details = {}
declared_inc = gtable.get("include")
declared_exc = gtable.get("exclude")
if declared_inc is not None:
inc_self = [interpret_pattern(s) for s in declared_inc]
else:
inc_self = inc_parent
if declared_exc is not None:
exc_self = [interpret_pattern(s) for s in declared_exc]
else:
exc_self = exc_parent
chain_info.append(group_name)
group_details[group_name] = {
"include": declared_inc,
"exclude": declared_exc,
}
return ((inc_self, exc_self), chain_info, group_details)
def interpret_pattern(raw: str) -> str:
"""
If it has glob chars => keep it.
If it starts with '.' => interpret as extension => '*.py'
If empty => '**'
Otherwise => literal name
"""
raw = raw.strip()
if not raw:
return "**"
if any(ch in raw for ch in ["*", "?", "[", "]", "!", "/"]):
return raw
if raw.startswith(".") and len(raw) > 1:
return f"*{raw}"
return raw
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment