Created
December 8, 2014 02:07
-
-
Save methane/3d98a8c10cdde4aad065 to your computer and use it in GitHub Desktop.
regex-dna.go
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 初期実装 | |
/* The Computer Language Benchmarks Game | |
* http://benchmarksgame.alioth.debian.org/ | |
* | |
* contributed by The Go Authors. | |
* modified by Tylor Arndt. | |
* modified by Chandra Sekar S to use optimized PCRE binding. | |
*/ | |
package main | |
import ( | |
"fmt" | |
"io/ioutil" | |
"os" | |
"runtime" | |
"github.com/tuxychandru/golang-pkg-pcre/src/pkg/pcre" | |
) | |
var variants = []string{ | |
"agggtaaa|tttaccct", | |
"[cgt]gggtaaa|tttaccc[acg]", | |
"a[act]ggtaaa|tttacc[agt]t", | |
"ag[act]gtaaa|tttac[agt]ct", | |
"agg[act]taaa|ttta[agt]cct", | |
"aggg[acg]aaa|ttt[cgt]ccct", | |
"agggt[cgt]aa|tt[acg]accct", | |
"agggta[cgt]a|t[acg]taccct", | |
"agggtaa[cgt]|[acg]ttaccct", | |
} | |
type Subst struct { | |
pat, repl string | |
} | |
var substs = []Subst{ | |
{"B", "(c|g|t)"}, | |
{"D", "(a|g|t)"}, | |
{"H", "(a|c|t)"}, | |
{"K", "(g|t)"}, | |
{"M", "(a|c)"}, | |
{"N", "(a|c|g|t)"}, | |
{"R", "(a|g)"}, | |
{"S", "(c|g)"}, | |
{"V", "(a|c|g)"}, | |
{"W", "(a|t)"}, | |
{"Y", "(c|t)"}, | |
} | |
func countMatches(pat string, bytes []byte) int { | |
m := pcre.MustCompile(pat, 0).Matcher(bytes, 0) | |
n := 0 | |
for f := m.Matches(); f; f = m.Match(bytes, 0) { | |
n++ | |
bytes = bytes[m.Index()[1]:] | |
} | |
return n | |
} | |
func main() { | |
runtime.GOMAXPROCS(runtime.NumCPU()) | |
bytes, err := ioutil.ReadAll(os.Stdin) | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "can't read input: %s\n", err) | |
os.Exit(2) | |
} | |
ilen := len(bytes) | |
// Delete the comment lines and newlines | |
bytes = pcre.MustCompile("(>[^\n]+)?\n", 0).ReplaceAll(bytes, []byte{}, 0) | |
clen := len(bytes) | |
mresults := make([]chan int, len(variants)) | |
var i int | |
var s string | |
for i, s = range variants { | |
ch := make(chan int) | |
mresults[i] = ch | |
go func(intch chan int, ss string) { | |
intch <- countMatches(ss, bytes) | |
}(ch, s) | |
} | |
lenresult := make(chan int) | |
bb := bytes | |
go func() { | |
for _, sub := range substs { | |
bb = pcre.MustCompile(sub.pat, 0).ReplaceAll(bb, []byte(sub.repl), 0) | |
} | |
lenresult <- len(bb) | |
}() | |
for i, s = range variants { | |
fmt.Printf("%s %d\n", s, <-mresults[i]) | |
} | |
fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment