Skip to content

Instantly share code, notes, and snippets.

@yosignals
Created March 22, 2023 21:50
Show Gist options
  • Save yosignals/e6d1cfe889308b793bffe6fc0a07be79 to your computer and use it in GitHub Desktop.
Save yosignals/e6d1cfe889308b793bffe6fc0a07be79 to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"flag"
"fmt"
"os"
"sort"
)
type HashFrequency struct {
Hash string
Frequency int
Users int
}
func main() {
var numDuplicates int
var filePath string
var showHelp bool
flag.IntVar(&numDuplicates, "d", -1, "Number of top duplicates to display. By default, show all hashes.")
flag.StringVar(&filePath, "p", "hashes.txt", "Path to the hashes file.")
flag.BoolVar(&showHelp, "h", false, "Display help message.")
flag.Parse()
if showHelp {
displayHelp()
return
}
file, err := os.Open(filePath)
if err != nil {
fmt.Fprintln(os.Stderr, "Error reading file:", err)
fmt.Fprintln(os.Stderr, "Please provide a valid file path using the '-p' flag, e.g., '-p /path/to/your/hashes/file'")
return
}
defer file.Close()
counts := make(map[string]*HashFrequency)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if _, ok := counts[line]; !ok {
counts[line] = &HashFrequency{Hash: line}
}
counts[line].Frequency++
counts[line].Users++
}
duplicates := []HashFrequency{}
for _, value := range counts {
if value.Frequency > 1 {
duplicates = append(duplicates, *value)
}
}
sort.Slice(duplicates, func(i, j int) bool {
return duplicates[i].Frequency > duplicates[j].Frequency
})
if numDuplicates > 0 {
fmt.Println("_______________________________")
fmt.Printf("Top %d highest duplicates:\n", numDuplicates)
fmt.Println("_______________________________")
} else {
fmt.Println("Duplicate Hashes:")
}
for i := 0; i < len(duplicates); i++ {
if numDuplicates > 0 && i >= numDuplicates {
break
}
fmt.Printf("%s: %d Users\n", duplicates[i].Hash, duplicates[i].Frequency)
}
if numDuplicates > 0 {
fmt.Println("_______________________________")
}
printHashStatistics(counts, duplicates)
printHashDuplicateVolumes(counts, duplicates)
}
func displayHelp() {
fmt.Println("Usage: go run hashcounter.go [flags]\n")
fmt.Println("Available flags:")
fmt.Println(" -d\tNumber of top duplicates to display. By default, show all hashes.")
fmt.Println(" -p\tPath to the hashes file.")
fmt.Println(" -h\tDisplay help message.\n")
fmt.Println("Considerations for preparing a hashes.txt file:")
fmt.Println(" 1. Each hash should be on a separate line.")
fmt.Println(" 2. Ensure the file contains no additional whitespace or special characters.\n")
fmt.Println("This script is useful for identifying duplicate hashes in a given file. It reads in a file of hashes,")
fmt.Println("counts the frequency of each hash, and outputs a list of the duplicate hashes and their frequency in")
fmt.Println("descending order. You can use this script to easily identify and remove any duplicate hashes in your file.")
}
func printHashStatistics(counts map[string]*HashFrequency, duplicates []HashFrequency) {
fmt.Println("_______________________________")
fmt.Println("Hash Statistics:")
fmt.Println("_______________________________")
totalHashes := len(counts)
totalDuplicates := len(duplicates)
totalUsers := 0
for _, value := range counts {
if value.Frequency > 1 {
totalUsers += value.Users
}
}
fmt.Printf("Total Hashes: %d\n", totalHashes)
fmt.Printf("Total Duplicates: %d (%.2f%%)\n", totalDuplicates, float64(totalDuplicates)/float64(totalHashes)*100)
fmt.Printf("Total Users with Duplicate Passwords: %d (%.2f%%)\n", totalUsers, float64(totalUsers)/float64(len(duplicates)*2)*100)
}
func printHashDuplicateVolumes(counts map[string]*HashFrequency, duplicates []HashFrequency) {
fmt.Println("_______________________________")
fmt.Println("Hash Duplicate Volumes:")
fmt.Println("_______________________________")
duplicateVolumes := make(map[int]int)
for _, value := range counts {
if value.Frequency > 1 {
duplicateVolumes[value.Frequency]++
}
}
sortedVolumes := make([]int, 0, len(duplicateVolumes))
for volume := range duplicateVolumes {
sortedVolumes = append(sortedVolumes, volume)
}
sort.Slice(sortedVolumes, func(i, j int) bool {
return sortedVolumes[i] > sortedVolumes[j]
})
for _, volume := range sortedVolumes {
fmt.Printf("%d duplicates: %d\n", volume, duplicateVolumes[volume])
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment