Skip to content

Instantly share code, notes, and snippets.

@methane
Created December 8, 2014 02:07
Show Gist options
  • Save methane/3d98a8c10cdde4aad065 to your computer and use it in GitHub Desktop.
Save methane/3d98a8c10cdde4aad065 to your computer and use it in GitHub Desktop.
regex-dna.go
// 初期実装
/* The Computer Language Benchmarks Game
* http://benchmarksgame.alioth.debian.org/
*
* contributed by The Go Authors.
* modified by Tylor Arndt.
* modified by Chandra Sekar S to use optimized PCRE binding.
*/
package main
import (
"fmt"
"io/ioutil"
"os"
"runtime"
"github.com/tuxychandru/golang-pkg-pcre/src/pkg/pcre"
)
var variants = []string{
"agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct",
}
type Subst struct {
pat, repl string
}
var substs = []Subst{
{"B", "(c|g|t)"},
{"D", "(a|g|t)"},
{"H", "(a|c|t)"},
{"K", "(g|t)"},
{"M", "(a|c)"},
{"N", "(a|c|g|t)"},
{"R", "(a|g)"},
{"S", "(c|g)"},
{"V", "(a|c|g)"},
{"W", "(a|t)"},
{"Y", "(c|t)"},
}
func countMatches(pat string, bytes []byte) int {
m := pcre.MustCompile(pat, 0).Matcher(bytes, 0)
n := 0
for f := m.Matches(); f; f = m.Match(bytes, 0) {
n++
bytes = bytes[m.Index()[1]:]
}
return n
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
bytes, err := ioutil.ReadAll(os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "can't read input: %s\n", err)
os.Exit(2)
}
ilen := len(bytes)
// Delete the comment lines and newlines
bytes = pcre.MustCompile("(>[^\n]+)?\n", 0).ReplaceAll(bytes, []byte{}, 0)
clen := len(bytes)
mresults := make([]chan int, len(variants))
var i int
var s string
for i, s = range variants {
ch := make(chan int)
mresults[i] = ch
go func(intch chan int, ss string) {
intch <- countMatches(ss, bytes)
}(ch, s)
}
lenresult := make(chan int)
bb := bytes
go func() {
for _, sub := range substs {
bb = pcre.MustCompile(sub.pat, 0).ReplaceAll(bb, []byte(sub.repl), 0)
}
lenresult <- len(bb)
}()
for i, s = range variants {
fmt.Printf("%s %d\n", s, <-mresults[i])
}
fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment