Created
August 6, 2016 05:22
-
-
Save billhathaway/bba09c9ed4ad736dbf6e7679ec1aa7de to your computer and use it in GitHub Desktop.
Added my solution algorithmFive to elvis problem. Similar to Tyler's algorithmFour, but slightly slower.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
go test -run none -bench . -benchtime 3s -benchmem | |
testing: warning: no tests to run | |
PASS | |
BenchmarkAlgorithmOne-4 1000000 4719 ns/op 53 B/op 2 allocs/op | |
BenchmarkAlgorithmTwo-4 3000000 1645 ns/op 0 B/op 0 allocs/op | |
BenchmarkAlgorithmThree-4 2000000 2572 ns/op 16 B/op 1 allocs/op | |
BenchmarkAlgorithmFour-4 1000000 3368 ns/op 1 B/op 1 allocs/op | |
BenchmarkAlgorithmFive-4 1000000 3423 ns/op 1 B/op 1 allocs/op |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// All material is licensed under the Apache License Version 2.0, January 2004 | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// Sample program that takes a stream of bytes and looks for the bytes | |
// “elvis” and when they are found, replace them with “Elvis”. The code | |
// cannot assume that there are any line feeds or other delimiters in the | |
// stream and the code must assume that the stream is of any arbitrary length. | |
// The solution cannot meaningfully buffer to the end of the stream and | |
// then process the replacement. | |
package main | |
import ( | |
"bufio" | |
"bytes" | |
"fmt" | |
"io" | |
) | |
// data represents a table of input and expected output. | |
var data = []struct { | |
input []byte | |
output []byte | |
}{ | |
{[]byte("abc"), []byte("abc")}, | |
{[]byte("elvis"), []byte("Elvis")}, | |
{[]byte("aElvis"), []byte("aElvis")}, | |
{[]byte("abcelvis"), []byte("abcElvis")}, | |
{[]byte("eelvis"), []byte("eElvis")}, | |
{[]byte("aelvis"), []byte("aElvis")}, | |
{[]byte("aabeeeelvis"), []byte("aabeeeElvis")}, | |
{[]byte("e l v i s"), []byte("e l v i s")}, | |
{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")}, | |
{[]byte(" elvi s"), []byte(" elvi s")}, | |
{[]byte("elvielvis"), []byte("elviElvis")}, | |
{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")}, | |
{[]byte("elvielviselvis"), []byte("elviElvisElvis")}, | |
} | |
// Declare what needs to be found and its replacement. | |
var find = []byte("elvis") | |
var repl = []byte("Elvis") | |
// Calculate the number of bytes we need to locate. | |
var size = len(find) | |
func main() { | |
var output bytes.Buffer | |
fmt.Println("=======================================\nRunning Algorithm One") | |
for _, d := range data { | |
output.Reset() | |
algorithmOne(d.input, &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
fmt.Println("=======================================\nRunning Algorithm Two") | |
for _, d := range data { | |
output.Reset() | |
algorithmTwo(d.input, &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
fmt.Println("=======================================\nRunning Algorithm Three") | |
for _, d := range data { | |
output.Reset() | |
algorithmThree(bytes.NewReader(d.input), &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
fmt.Println("=======================================\nRunning Algorithm Four") | |
for _, d := range data { | |
output.Reset() | |
algorithmFour(bytes.NewReader(d.input), &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
fmt.Println("=======================================\nRunning Algorithm Five") | |
for _, d := range data { | |
output.Reset() | |
algorithmFive(bytes.NewReader(d.input), &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
} | |
// algorithmOne is one way to solve the problem. | |
func algorithmOne(data []byte, output *bytes.Buffer) { | |
// Use a bytes Reader to provide a stream to process. | |
input := bytes.NewReader(data) | |
// Declare the buffer we need to process the stream. | |
buf := make([]byte, size) | |
end := size - 1 | |
// Read in an initial number of bytes we need to get started. | |
if n, err := io.ReadFull(input, buf[:end]); err != nil { | |
output.Write(buf[:n]) | |
return | |
} | |
for { | |
// Read in one byte from the input stream. | |
b, err := input.ReadByte() | |
if err != nil { | |
// Flush the reset of the bytes we have. | |
output.Write(buf[:end]) | |
break | |
} | |
// Add this byte to the end of the buffer. | |
buf[end] = b | |
// If we have a match, replace the bytes. | |
if bytes.Compare(buf, find) == 0 { | |
copy(buf, repl) | |
} | |
// Write the front byte since it has been compared. | |
output.WriteByte(buf[0]) | |
// Slice that front byte out. | |
copy(buf, buf[1:]) | |
} | |
} | |
// algorithmTwo is a second way to solve the problem. | |
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell | |
func algorithmTwo(data []byte, output *bytes.Buffer) { | |
// Use the bytes Reader to provide a stream to process. | |
input := bytes.NewReader(data) | |
// Create an index variable to match bytes. | |
idx := 0 | |
for { | |
// Read a single byte from our input. | |
b, err := input.ReadByte() | |
if err != nil { | |
break | |
} | |
// Does this byte match the byte at this offset? | |
if b == find[idx] { | |
// It matches so increment the index position. | |
idx++ | |
// If every byte has been matched, write | |
// out the replacement. | |
if idx == size { | |
output.Write(repl) | |
idx = 0 | |
} | |
continue | |
} | |
// Did we have any sort of match on any given byte? | |
if idx != 0 { | |
// Write what we've matched up to this point. | |
output.Write(find[:idx]) | |
// Unread the unmatched byte so it can be processed again. | |
input.UnreadByte() | |
// Reset the offset to start matching from the beginning. | |
idx = 0 | |
continue | |
} | |
// There was no previous match. Write byte and reset. | |
output.WriteByte(b) | |
idx = 0 | |
} | |
} | |
// algorithmThree is a second way to solve the problem. This approach takes an | |
// io.Reader to represent an infinite stream. This allows for the algorithm to | |
// accept input from just about anywhere, thanks to the beauty of Go | |
// interfaces. | |
// | |
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell | |
func algorithmThree(data io.Reader, output *bytes.Buffer) { | |
// Use bufio.NewReaderSize to provide a stream from which we can read and | |
// unread single bytes. | |
input := bufio.NewReaderSize(data, 1) | |
// Create an index variable to match bytes. | |
idx := 0 | |
for { | |
// Read a single byte from our input. | |
b, err := input.ReadByte() | |
if err != nil { | |
break | |
} | |
// Does this byte match the byte at this offset? | |
if b == find[idx] { | |
// It matches so increment the index position. | |
idx++ | |
// If every byte has been matched, write | |
// out the replacement. | |
if idx == size { | |
output.Write(repl) | |
idx = 0 | |
} | |
continue | |
} | |
// Did we have any sort of match on any given byte? | |
if idx != 0 { | |
// Write what we've matched up to this point. | |
output.Write(find[:idx]) | |
// Unread the unmatched byte so it can be processed again. | |
input.UnreadByte() | |
// Reset the offset to start matching from the beginning. | |
idx = 0 | |
continue | |
} | |
// There was no previous match. Write byte and reset. | |
output.WriteByte(b) | |
idx = 0 | |
} | |
} | |
// algorithmFour is a fourth way to solve the problem. This approach takes an | |
// io.Reader to represent an infinite stream. This allows for the algorithm to | |
// accept input from just about anywhere, thanks to the beauty of Go | |
// interfaces. | |
// | |
// Additionally, it has been optimized for minimal allocations by reading | |
// directly from the stream onto the stack. This results in 1 allocation of 1 | |
// byte at the time of writing. | |
// | |
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell | |
func algorithmFour(data io.Reader, output *bytes.Buffer) { | |
// Create a byte slice of length 1 into which our byte will be read. | |
b := make([]byte, 1) | |
// Create an index variable to match bytes. | |
idx := 0 | |
for { | |
// Are we re-using the byte from a previous call? | |
if b[0] == 0 { | |
// Read a single byte from our input. | |
n, err := data.Read(b) | |
if n == 0 || err != nil { | |
break | |
} | |
} | |
// Does this byte match the byte at this offset? | |
if b[0] == find[idx] { | |
// It matches so increment the index position. | |
idx++ | |
// If every byte has been matched, write | |
// out the replacement. | |
if idx == size { | |
output.Write(repl) | |
idx = 0 | |
} | |
// Reset the reader byte to 0 so another byte will be read. | |
b[0] = 0 | |
continue | |
} | |
// Did we have any sort of match on any given byte? | |
if idx != 0 { | |
// Write what we've matched up to this point. | |
output.Write(find[:idx]) | |
// NOTE: we are NOT resetting the reader byte to 0 here because we need | |
// to re-use it on the next call. This is equivalent to the UnreadByte() | |
// call in the other functions. | |
// Reset the offset to start matching from the beginning. | |
idx = 0 | |
continue | |
} | |
// There was no previous match. Write byte and reset. | |
output.WriteByte(b[0]) | |
// Reset the reader byte to 0 so another byte will be read. | |
b[0] = 0 | |
} | |
} | |
// io.Reader and io.Writer to model an infinite stream | |
// Provided by Bill Hathaway @billhathaway | |
func algorithmFive(r io.Reader, w *bytes.Buffer) { | |
var idx int // where we are in the match | |
var buf = make([]byte, 1) | |
for { | |
n, err := r.Read(buf) | |
if err != nil || n == 0 { | |
break | |
} | |
// does newest byte match next byte of find pattern? | |
if buf[0] == find[idx] { | |
idx++ | |
// if a full match, write out the replacement pattern | |
if idx == len(find) { | |
w.Write(repl) | |
idx = 0 | |
} | |
continue | |
} | |
// if we have matched anything earlier, write it | |
if idx > 0 { | |
w.Write(find[:idx]) | |
idx = 0 | |
} | |
// match start of pattern? | |
if buf[0] == find[0] { | |
idx = 1 | |
continue | |
} | |
// write out what we read since no match | |
w.Write(buf) | |
} | |
// write out any partial match before returning | |
if idx > 0 { | |
w.Write(find[:idx]) | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// All material is licensed under the Apache License Version 2.0, January 2004 | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// go test -run none -bench . -benchtime 3s -benchmem | |
// Tests to see how each algorithm compare. | |
package main | |
import ( | |
"bytes" | |
"testing" | |
) | |
// assembleInputStream appends all the input slices together to allow for | |
// consistent testing across all benchmarks | |
func assembleInputStream() []byte { | |
var out []byte | |
for _, d := range data { | |
out = append(out, d.input...) | |
} | |
return out | |
} | |
// Capture the time it takes to execute algorithm one. | |
func BenchmarkAlgorithmOne(b *testing.B) { | |
var output bytes.Buffer | |
in := assembleInputStream() | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
algorithmOne(in, &output) | |
} | |
} | |
// Capture the time it takes to execute algorithm two. | |
func BenchmarkAlgorithmTwo(b *testing.B) { | |
var output bytes.Buffer | |
in := assembleInputStream() | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
algorithmTwo(in, &output) | |
} | |
} | |
// Capture the time it takes to execute algorithm three. | |
func BenchmarkAlgorithmThree(b *testing.B) { | |
var output bytes.Buffer | |
in := bytes.NewReader(assembleInputStream()) | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
// Seek our reader to the beginning of the byte array | |
in.Seek(0, 0) | |
algorithmThree(in, &output) | |
} | |
} | |
// Capture the time it takes to execute algorithm four. | |
func BenchmarkAlgorithmFour(b *testing.B) { | |
var output bytes.Buffer | |
in := bytes.NewReader(assembleInputStream()) | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
// Seek our reader to the beginning of the byte array | |
in.Seek(0, 0) | |
algorithmFour(in, &output) | |
} | |
} | |
// Capture the time it takes to execute algorithm five. | |
func BenchmarkAlgorithmFive(b *testing.B) { | |
var output bytes.Buffer | |
in := bytes.NewReader(assembleInputStream()) | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
// Seek our reader to the beginning of the byte array | |
in.Seek(0, 0) | |
algorithmFive(in, &output) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment