Last active
July 22, 2017 09:59
-
-
Save sylr/c214a1630fa44ff70ddf9d695fe7aeb3 to your computer and use it in GitHub Desktop.
Generate array of hashes and compute levenshtein distances of hashes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "fmt" | |
| "sync" | |
| "time" | |
| "strconv" | |
| "math/rand" | |
| "crypto/md5" | |
| "encoding/hex" | |
| "github.com/arbovm/levenshtein" | |
| ) | |
| const THREADS = 2 | |
| const HASH_COUNT = 1000 | |
| const MAX_LEV_DISTANCE = 22 | |
| type lev struct { | |
| str1 string | |
| str2 string | |
| distance int | |
| } | |
| func ceil(ceiling int, i int) int { | |
| if i <= ceiling { | |
| return i | |
| } else { | |
| return ceiling | |
| } | |
| } | |
| func distance(wg *sync.WaitGroup, c chan lev, hashes *[HASH_COUNT]string, start int, end int) { | |
| fmt.Printf("distance array len=%d start=%d end=%d \n", len(*hashes), start, end) | |
| // defer signal | |
| defer wg.Done() | |
| length := len(*hashes) | |
| //ceiling := ceil(length, end) | |
| // iterate over part of the array assigned to the thread | |
| for i := start; i <= end; i++ { | |
| // iterate over array except for indexes from $start to $end | |
| // to avoid computing distances twice | |
| for j := 0; j < length; j++ { | |
| if j >= start && j <= end { | |
| continue | |
| } | |
| // distance | |
| dist := levenshtein.Distance(hashes[i], hashes[j]) | |
| if dist < MAX_LEV_DISTANCE { | |
| c <- lev{hashes[i], hashes[j], dist} | |
| } | |
| } | |
| } | |
| } | |
| func main() { | |
| var hashes [HASH_COUNT]string | |
| var wg sync.WaitGroup | |
| c := make(chan lev) | |
| r := rand.NewSource(time.Now().UnixNano()) | |
| r1 := rand.New(r) | |
| hasher := md5.New() | |
| // Generate array of random hashes | |
| for i := 0; i < HASH_COUNT; i++ { | |
| str := strconv.Itoa(r1.Intn(100000000)) + string(time.Now().UnixNano()) | |
| byt := []byte(str) | |
| hasher.Reset() | |
| hasher.Write(byt) | |
| hash := hasher.Sum(nil) | |
| hashes[i] = hex.EncodeToString(hash) | |
| } | |
| wg.Add(THREADS) | |
| // Launch theads | |
| for i := 0; i < THREADS; i++ { | |
| var start, end int | |
| start = i * int(len(hashes) / THREADS) | |
| if (i != THREADS - 1) { | |
| end = (i + 1) * int(len(hashes) / THREADS) - 1 | |
| } else { | |
| end = (i + 1) * int(len(hashes) / THREADS) - 1 | |
| } | |
| go distance( | |
| &wg, | |
| c, | |
| &hashes, | |
| start, | |
| end, | |
| ) | |
| } | |
| // print lev distances | |
| go func() { | |
| for response := range c { | |
| fmt.Println(response) | |
| } | |
| }() | |
| wg.Wait() | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment