Created
June 17, 2025 15:36
-
-
Save huangsam/3dda4e91e6c94b3446a3847b12a3012c to your computer and use it in GitHub Desktop.
Finding oldest Git lines with Go
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "bufio" | |
| "fmt" | |
| "io" | |
| "log" | |
| "os" | |
| "os/exec" | |
| "path/filepath" | |
| "regexp" | |
| "sort" | |
| "strings" | |
| "sync" | |
| ) | |
| // PrettyLineBlame represents a parsed line from git blame for cleaner output | |
| type PrettyLineBlame struct { | |
| // Date will now include time: YYYYMMDD HH:MM:SS | |
| DateTime string | |
| CommitHash string | |
| Author string | |
| LineNum string // The line number in the file | |
| FilePath string // The file path as reported by git blame | |
| CodeContent string | |
| } | |
| // blameLineRegex parses git blame output lines. | |
| // Example line: 2c4fbdf9 README.md (Samuel Huang 20200928 08:15:30 1) # GitHub Pages for huangsam | |
| var blameLineRegex = regexp.MustCompile( | |
| `^([0-9a-f]{8,40})\s+` + // 1: Commit hash (e.g., 2c4fbdf9) | |
| `(\S+)\s+` + // 2: FilePath (e.g., README.md) | |
| `\((.*?)\s+` + // 3: Author name (e.g., Samuel Huang) - non-greedy match | |
| `(\d{8}\s+\d{2}:\d{2}:\d{2})\s+` + // 4: Date and Time (YYYYMMDD HH:MM:SS) - NEW | |
| `\s*(\d+)\)\s*` + // 5: Line Number (e.g., 1) - with optional spaces before digit | |
| `(.*)$`) // 6: The actual code content | |
| // Global WaitGroup for `git blame` worker goroutines | |
| var workerWg sync.WaitGroup | |
| func main() { | |
| // Set log output to stderr to keep stdout clean for the results | |
| log.SetOutput(os.Stderr) | |
| if len(os.Args) < 2 { | |
| fmt.Println("Usage: go run main.go <directory>") | |
| os.Exit(1) | |
| } | |
| rootDir := os.Args[1] | |
| blameResults := make(chan PrettyLineBlame, 1000) | |
| var allBlames []PrettyLineBlame | |
| var collectorWg sync.WaitGroup | |
| collectorWg.Add(1) | |
| go func() { | |
| defer collectorWg.Done() | |
| for lb := range blameResults { | |
| allBlames = append(allBlames, lb) | |
| } | |
| }() | |
| err := filepath.Walk(rootDir, func(path string, info os.FileInfo, err error) error { | |
| if err != nil { | |
| log.Printf("ERROR: β Accessing path %q: %v\n", path, err) | |
| return nil | |
| } | |
| if info.IsDir() { | |
| if info.Name() == ".git" || info.Name() == "node_modules" { | |
| return filepath.SkipDir | |
| } | |
| return nil | |
| } | |
| file, err := os.Open(path) | |
| if err != nil { | |
| log.Printf("ERROR: π Opening file %q for binary check: %v\n", path, err) | |
| return nil | |
| } | |
| defer file.Close() | |
| buffer := make([]byte, 512) | |
| n, err := file.Read(buffer) | |
| if err != nil && err != io.EOF { | |
| log.Printf("ERROR: π Reading file %q for binary check: %v\n", path, err) | |
| return nil | |
| } | |
| isBinary := false | |
| for i := 0; i < n; i++ { | |
| if buffer[i] == 0 { | |
| isBinary = true | |
| break | |
| } | |
| } | |
| if isBinary { | |
| return nil // Skip binary files silently | |
| } | |
| // Check if the file is tracked by Git | |
| cmdLsFiles := exec.Command("git", "ls-files", "--error-unmatch", path) | |
| cmdLsFiles.Stderr = nil // Suppress stderr for this command | |
| if err := cmdLsFiles.Run(); err != nil { | |
| // File is not tracked by Git, skip it | |
| return nil | |
| } | |
| workerWg.Add(1) | |
| go func(filePath string, outputChan chan<- PrettyLineBlame) { | |
| defer workerWg.Done() | |
| performGitBlame(filePath, outputChan) | |
| }(path, blameResults) | |
| return nil | |
| }) | |
| if err != nil { | |
| log.Fatalf("FATAL: π₯ Error walking the file system: %v\n", err) | |
| } | |
| workerWg.Wait() // Wait for all blame operations to complete | |
| close(blameResults) // Close the channel when all workers are done | |
| collectorWg.Wait() // Wait for the collector to finish processing | |
| // Sort by DateTime (lexicographical sort works for YYYYMMDD HH:MM:SS) | |
| sort.Slice(allBlames, func(i, j int) bool { | |
| return allBlames[i].DateTime < allBlames[j].DateTime | |
| }) | |
| tailSize := 10 | |
| if len(allBlames) < tailSize { | |
| tailSize = len(allBlames) | |
| } | |
| fmt.Println("\n--- π°οΈ Oldest Lines ---") | |
| if tailSize == 0 { | |
| fmt.Println("No blameable lines found matching criteria. π€") | |
| } else { | |
| // Calculate max widths for each column for character alignment, | |
| // *only considering the lines that will be printed (the tailSize)* | |
| maxDateTimeLen := len("YYYYMMDD HH:MM:SS") // New fixed length for DateTime | |
| maxCommitHashLen := len("0391ba3") // Fixed length for short hash (7 chars) | |
| maxFilePathLen := 0 | |
| maxAuthorLen := 0 | |
| maxLineNumLen := 0 | |
| // Iterate only over the 'tailSize' (the lines to be printed) for width calculation | |
| for i := 0; i < tailSize; i++ { | |
| lb := allBlames[i] | |
| if len(lb.FilePath) > maxFilePathLen { | |
| maxFilePathLen = len(lb.FilePath) | |
| } | |
| if len(lb.Author) > maxAuthorLen { | |
| maxAuthorLen = len(lb.Author) | |
| } | |
| if len(lb.LineNum) > maxLineNumLen { | |
| maxLineNumLen = len(lb.LineNum) | |
| } | |
| } | |
| // Add some padding to each max length for better readability | |
| padding := 2 // Adjust this value to control spacing between columns | |
| // maxDateTimeLen += 0 // DateTime is fixed length, emoji handles left pad | |
| maxCommitHashLen += padding | |
| maxFilePathLen += padding | |
| maxAuthorLen += padding | |
| maxLineNumLen += padding | |
| // Print formatted output | |
| for i := 0; i < tailSize; i++ { | |
| lb := allBlames[i] | |
| // Using fmt.Sprintf to format each field to a fixed width | |
| // %-Ns means left-align, pad with spaces to N characters | |
| fmt.Printf("ποΈ %-*s π %-*s π %-*s π€ %-*s:L%-*s \"%s\"\n", | |
| maxDateTimeLen, lb.DateTime, | |
| maxCommitHashLen, lb.CommitHash[:7], // Using short hash here | |
| maxFilePathLen, lb.FilePath, | |
| maxAuthorLen, strings.TrimSpace(lb.Author), | |
| maxLineNumLen, lb.LineNum, | |
| strings.TrimSpace(lb.CodeContent), | |
| ) | |
| } | |
| } | |
| } | |
| func performGitBlame(filePath string, outputChan chan<- PrettyLineBlame) { | |
| // Changed --date=format to include HH:MM:SS | |
| cmd := exec.Command("git", "blame", "--date=format:%Y%m%d %H:%M:%S", "-f", filePath) | |
| stdout, err := cmd.StdoutPipe() | |
| if err != nil { | |
| log.Printf("ERROR: π« %s: Failed to create stdout pipe: %v\n", filePath, err) | |
| return | |
| } | |
| stderr, err := cmd.StderrPipe() | |
| if err != nil { | |
| log.Printf("ERROR: π« %s: Failed to create stderr pipe: %v\n", filePath, err) | |
| return | |
| } | |
| if err := cmd.Start(); err != nil { | |
| log.Printf("ERROR: β %s: Failed to start git blame: %v\n", filePath, err) | |
| return | |
| } | |
| scanner := bufio.NewScanner(stdout) | |
| for scanner.Scan() { | |
| line := scanner.Text() | |
| matches := blameLineRegex.FindStringSubmatch(line) | |
| if matches == nil || len(matches) != 7 { // Full match + 6 captured groups | |
| continue // Skip lines that don't match the expected blame format | |
| } | |
| outputChan <- PrettyLineBlame{ | |
| DateTime: matches[4], // Date and Time (YYYYMMDD HH:MM:SS) - NEW | |
| CommitHash: matches[1], // Commit hash | |
| Author: strings.TrimSpace(matches[3]), // Author name | |
| LineNum: matches[5], // Line Number | |
| FilePath: matches[2], // File Path (e.g., README.md) | |
| CodeContent: strings.TrimLeft(matches[6], "\t "), // Actual code content | |
| } | |
| } | |
| slurp, _ := io.ReadAll(stderr) | |
| if len(slurp) > 0 { | |
| log.Printf("WARNING: β οΈ git blame stderr for %s: %s", filePath, strings.TrimSpace(string(slurp))) | |
| } | |
| if err := cmd.Wait(); err != nil { | |
| log.Printf("ERROR: π %s: git blame finished with error: %v\n", filePath, err) | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment