Skip to content

Instantly share code, notes, and snippets.

@sylr
Last active July 22, 2017 09:59
Show Gist options
  • Select an option

  • Save sylr/c214a1630fa44ff70ddf9d695fe7aeb3 to your computer and use it in GitHub Desktop.

Select an option

Save sylr/c214a1630fa44ff70ddf9d695fe7aeb3 to your computer and use it in GitHub Desktop.
Generate array of hashes and compute levenshtein distances of hashes
package main
import (
"fmt"
"sync"
"time"
"strconv"
"math/rand"
"crypto/md5"
"encoding/hex"
"github.com/arbovm/levenshtein"
)
const THREADS = 2
const HASH_COUNT = 1000
const MAX_LEV_DISTANCE = 22
type lev struct {
str1 string
str2 string
distance int
}
func ceil(ceiling int, i int) int {
if i <= ceiling {
return i
} else {
return ceiling
}
}
func distance(wg *sync.WaitGroup, c chan lev, hashes *[HASH_COUNT]string, start int, end int) {
fmt.Printf("distance array len=%d start=%d end=%d \n", len(*hashes), start, end)
// defer signal
defer wg.Done()
length := len(*hashes)
//ceiling := ceil(length, end)
// iterate over part of the array assigned to the thread
for i := start; i <= end; i++ {
// iterate over array except for indexes from $start to $end
// to avoid computing distances twice
for j := 0; j < length; j++ {
if j >= start && j <= end {
continue
}
// distance
dist := levenshtein.Distance(hashes[i], hashes[j])
if dist < MAX_LEV_DISTANCE {
c <- lev{hashes[i], hashes[j], dist}
}
}
}
}
func main() {
var hashes [HASH_COUNT]string
var wg sync.WaitGroup
c := make(chan lev)
r := rand.NewSource(time.Now().UnixNano())
r1 := rand.New(r)
hasher := md5.New()
// Generate array of random hashes
for i := 0; i < HASH_COUNT; i++ {
str := strconv.Itoa(r1.Intn(100000000)) + string(time.Now().UnixNano())
byt := []byte(str)
hasher.Reset()
hasher.Write(byt)
hash := hasher.Sum(nil)
hashes[i] = hex.EncodeToString(hash)
}
wg.Add(THREADS)
// Launch theads
for i := 0; i < THREADS; i++ {
var start, end int
start = i * int(len(hashes) / THREADS)
if (i != THREADS - 1) {
end = (i + 1) * int(len(hashes) / THREADS) - 1
} else {
end = (i + 1) * int(len(hashes) / THREADS) - 1
}
go distance(
&wg,
c,
&hashes,
start,
end,
)
}
// print lev distances
go func() {
for response := range c {
fmt.Println(response)
}
}()
wg.Wait()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment