Skip to content

Instantly share code, notes, and snippets.

@przmv
Created April 6, 2016 19:31
Show Gist options
  • Save przmv/44b5f8fc284372176cb8ad1f07eb6ffb to your computer and use it in GitHub Desktop.
Save przmv/44b5f8fc284372176cb8ad1f07eb6ffb to your computer and use it in GitHub Desktop.
gonx vs glow
package main
import (
"crypto/md5"
"flag"
"fmt"
"strings"
"github.com/chrislusf/glow/flow"
)
func main() {
flag.Parse()
flow.New().TextFile(
"top-1m.csv", 3,
).Map(func(line string, ch chan string) {
parts := strings.Split(line, ",")
data := []byte(parts[1])
ch <- fmt.Sprintf("%x", md5.Sum(data))
}).Map(func(s string) {
fmt.Println(s)
}).Run()
}
package main
import (
"crypto/md5"
"fmt"
"os"
"strings"
"github.com/satyrius/gonx"
)
type SplitParser struct{}
func (p *SplitParser) ParseString(line string) (*gonx.Entry, error) {
parts := strings.Split(line, ",")
entry := gonx.NewEmptyEntry()
entry.SetField("domain", parts[1])
return entry, nil
}
type Hash struct {
Fields []string
}
func (r *Hash) Reduce(input chan *gonx.Entry, output chan *gonx.Entry) {
for entry := range input {
for _, name := range r.Fields {
val, err := entry.Field(name)
if err == nil {
data := []byte(val)
s := fmt.Sprintf("%x", md5.Sum(data))
entry.SetField(name, s)
}
}
output <- entry
}
close(output)
}
func main() {
reader, _ := os.Open("top-1m.csv")
parser := new(SplitParser)
reducer := &Hash{Fields: []string{"domain"}}
output := gonx.MapReduce(reader, parser, reducer)
for res := range output {
val, err := res.Field("domain")
if err == nil {
fmt.Println(val)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment