Skip to content

Instantly share code, notes, and snippets.

@gbraad
Last active May 12, 2025 12:47
Show Gist options
  • Save gbraad/551eabc8a79de04a1370e6ce1333284f to your computer and use it in GitHub Desktop.
Save gbraad/551eabc8a79de04a1370e6ce1333284f to your computer and use it in GitHub Desktop.
package main
import (
"bytes"
"fmt"
"os"
"os/exec"
"strings"
"unicode"
)
// Allowed non-ASCII characters in UTF-8
var allowedNonASCII = []rune{
'\u00E1', // LATIN SMALL LETTER A WITH ACUTE
'\u00E5', // LATIN SMALL LETTER A WITH RING ABOVE
'\u00E4', // LATIN SMALL LETTER A WITH DIAERESIS
'\u00F6', // LATIN SMALL LETTER O WITH DIAERESIS
'\u00B1', // PLUS-MINUS SIGN
'\u00A7', // SECTION SIGN
'\u00DF', // LATIN SMALL LETTER SHARP S
'\U0001F64F', // PERSON WITH FOLDED HANDS
}
func main() {
// Run "git ls-files" to get a list of all files in the repository
gitFiles, err := getGitFiles()
if err != nil {
fmt.Println("Error running git ls-files:", err)
os.Exit(1)
}
issues := 0
for _, file := range gitFiles {
errs := checkFile(file)
if len(errs) > 0 {
issues++
for _, e := range errs {
fmt.Printf("%s: %s\n", file, e)
}
}
}
if issues > 0 {
os.Exit(1)
}
}
// getGitFiles runs "git ls-files" and returns a list of files
func getGitFiles() ([]string, error) {
cmd := exec.Command("git", "ls-files")
output, err := cmd.Output()
if err != nil {
return nil, err
}
files := strings.Split(strings.TrimSpace(string(output)), "\n")
return files, nil
}
// checkFile performs checks for a single file and returns any issues found
func checkFile(path string) []string {
var issues []string
// Read the entire file content
content, err := os.ReadFile(path)
if err != nil {
issues = append(issues, fmt.Sprintf("cannot open file: %v", err))
return issues
}
lines := bytes.Split(content, []byte("\n"))
// Check for trailing whitespace
for i, line := range lines {
if bytes.HasSuffix(line, []byte(" ")) || bytes.HasSuffix(line, []byte("\t")) {
issues = append(issues, fmt.Sprintf("line %d: trailing whitespace detected", i+1))
}
}
// Check for mixed EOL types
eolType := detectEOL(content)
if eolType == "mixed" {
issues = append(issues, "mixed EOL types detected")
}
// Check for non-ASCII characters
nonAscii := findNonASCII(content)
for _, char := range nonAscii {
if !isAllowedNonASCII(char) {
issues = append(issues, fmt.Sprintf("non-ASCII character detected: %q", char))
}
}
// Check for missing newline at EOF
if len(content) > 0 && content[len(content)-1] != '\n' {
issues = append(issues, "no EOL at EOF")
}
// Check for binary content
if isBinary(content) {
issues = append(issues, "binary content detected")
}
return issues
}
// detectEOL detects the type of EOL used in the content
func detectEOL(content []byte) string {
cr := bytes.Count(content, []byte("\r"))
lf := bytes.Count(content, []byte("\n"))
if cr > 0 && lf == 0 {
return "CR"
}
if cr == 0 && lf > 0 {
return "LF"
}
if cr > 0 && lf > 0 {
return "mixed"
}
return "none"
}
// findNonASCII finds all non-ASCII characters in the content
func findNonASCII(content []byte) []rune {
var nonAscii []rune
for _, r := range string(content) {
if r > unicode.MaxASCII {
nonAscii = append(nonAscii, r)
}
}
return nonAscii
}
// isAllowedNonASCII checks if a non-ASCII character is in the allowed list
func isAllowedNonASCII(char rune) bool {
for _, allowed := range allowedNonASCII {
if char == allowed {
return true
}
}
return false
}
// isBinary checks if the content contains binary data
func isBinary(content []byte) bool {
for _, b := range content {
if b < 0x09 || (b > 0x0D && b < 0x20) && b != 0x7F {
return true
}
}
return false
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment