|
package main |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
"os" |
|
"os/exec" |
|
"strings" |
|
"unicode" |
|
) |
|
|
|
// Allowed non-ASCII characters in UTF-8 |
|
var allowedNonASCII = []rune{ |
|
'\u00E1', // LATIN SMALL LETTER A WITH ACUTE |
|
'\u00E5', // LATIN SMALL LETTER A WITH RING ABOVE |
|
'\u00E4', // LATIN SMALL LETTER A WITH DIAERESIS |
|
'\u00F6', // LATIN SMALL LETTER O WITH DIAERESIS |
|
'\u00B1', // PLUS-MINUS SIGN |
|
'\u00A7', // SECTION SIGN |
|
'\u00DF', // LATIN SMALL LETTER SHARP S |
|
'\U0001F64F', // PERSON WITH FOLDED HANDS |
|
} |
|
|
|
func main() { |
|
// Run "git ls-files" to get a list of all files in the repository |
|
gitFiles, err := getGitFiles() |
|
if err != nil { |
|
fmt.Println("Error running git ls-files:", err) |
|
os.Exit(1) |
|
} |
|
|
|
issues := 0 |
|
|
|
for _, file := range gitFiles { |
|
errs := checkFile(file) |
|
if len(errs) > 0 { |
|
issues++ |
|
for _, e := range errs { |
|
fmt.Printf("%s: %s\n", file, e) |
|
} |
|
} |
|
} |
|
|
|
if issues > 0 { |
|
os.Exit(1) |
|
} |
|
} |
|
|
|
// getGitFiles runs "git ls-files" and returns a list of files |
|
func getGitFiles() ([]string, error) { |
|
cmd := exec.Command("git", "ls-files") |
|
output, err := cmd.Output() |
|
if err != nil { |
|
return nil, err |
|
} |
|
files := strings.Split(strings.TrimSpace(string(output)), "\n") |
|
return files, nil |
|
} |
|
|
|
// checkFile performs checks for a single file and returns any issues found |
|
func checkFile(path string) []string { |
|
var issues []string |
|
|
|
// Read the entire file content |
|
content, err := os.ReadFile(path) |
|
if err != nil { |
|
issues = append(issues, fmt.Sprintf("cannot open file: %v", err)) |
|
return issues |
|
} |
|
|
|
lines := bytes.Split(content, []byte("\n")) |
|
|
|
// Check for trailing whitespace |
|
for i, line := range lines { |
|
if bytes.HasSuffix(line, []byte(" ")) || bytes.HasSuffix(line, []byte("\t")) { |
|
issues = append(issues, fmt.Sprintf("line %d: trailing whitespace detected", i+1)) |
|
} |
|
} |
|
|
|
// Check for mixed EOL types |
|
eolType := detectEOL(content) |
|
if eolType == "mixed" { |
|
issues = append(issues, "mixed EOL types detected") |
|
} |
|
|
|
// Check for non-ASCII characters |
|
nonAscii := findNonASCII(content) |
|
for _, char := range nonAscii { |
|
if !isAllowedNonASCII(char) { |
|
issues = append(issues, fmt.Sprintf("non-ASCII character detected: %q", char)) |
|
} |
|
} |
|
|
|
// Check for missing newline at EOF |
|
if len(content) > 0 && content[len(content)-1] != '\n' { |
|
issues = append(issues, "no EOL at EOF") |
|
} |
|
|
|
// Check for binary content |
|
if isBinary(content) { |
|
issues = append(issues, "binary content detected") |
|
} |
|
|
|
return issues |
|
} |
|
|
|
// detectEOL detects the type of EOL used in the content |
|
func detectEOL(content []byte) string { |
|
cr := bytes.Count(content, []byte("\r")) |
|
lf := bytes.Count(content, []byte("\n")) |
|
|
|
if cr > 0 && lf == 0 { |
|
return "CR" |
|
} |
|
if cr == 0 && lf > 0 { |
|
return "LF" |
|
} |
|
if cr > 0 && lf > 0 { |
|
return "mixed" |
|
} |
|
return "none" |
|
} |
|
|
|
// findNonASCII finds all non-ASCII characters in the content |
|
func findNonASCII(content []byte) []rune { |
|
var nonAscii []rune |
|
for _, r := range string(content) { |
|
if r > unicode.MaxASCII { |
|
nonAscii = append(nonAscii, r) |
|
} |
|
} |
|
return nonAscii |
|
} |
|
|
|
// isAllowedNonASCII checks if a non-ASCII character is in the allowed list |
|
func isAllowedNonASCII(char rune) bool { |
|
for _, allowed := range allowedNonASCII { |
|
if char == allowed { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
// isBinary checks if the content contains binary data |
|
func isBinary(content []byte) bool { |
|
for _, b := range content { |
|
if b < 0x09 || (b > 0x0D && b < 0x20) && b != 0x7F { |
|
return true |
|
} |
|
} |
|
return false |
|
} |