Last active
August 5, 2016 16:57
-
-
Save tylerstillwater/a37ac5444afe4492b2a75a2cd0be3238 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// All material is licensed under the Apache License Version 2.0, January 2004 | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// Sample program that takes a stream of bytes and looks for the bytes | |
// “elvis” and when they are found, replace them with “Elvis”. The code | |
// cannot assume that there are any line feeds or other delimiters in the | |
// stream and the code must assume that the stream is of any arbitrary length. | |
// The solution cannot meaningfully buffer to the end of the stream and | |
// then process the replacement. | |
package main | |
import ( | |
"bufio" | |
"bytes" | |
"fmt" | |
"io" | |
) | |
// data represents a table of input and expected output. | |
var data = []struct { | |
input []byte | |
output []byte | |
}{ | |
{[]byte("abc"), []byte("abc")}, | |
{[]byte("elvis"), []byte("Elvis")}, | |
{[]byte("aElvis"), []byte("aElvis")}, | |
{[]byte("abcelvis"), []byte("abcElvis")}, | |
{[]byte("eelvis"), []byte("eElvis")}, | |
{[]byte("aelvis"), []byte("aElvis")}, | |
{[]byte("aabeeeelvis"), []byte("aabeeeElvis")}, | |
{[]byte("e l v i s"), []byte("e l v i s")}, | |
{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")}, | |
{[]byte(" elvi s"), []byte(" elvi s")}, | |
{[]byte("elvielvis"), []byte("elviElvis")}, | |
{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")}, | |
{[]byte("elvielviselvis"), []byte("elviElvisElvis")}, | |
} | |
// Declare what needs to be found and its replacement. | |
var find = []byte("elvis") | |
var repl = []byte("Elvis") | |
// Calculate the number of bytes we need to locate. | |
var size = len(find) | |
func main() { | |
var output bytes.Buffer | |
fmt.Println("=======================================\nRunning Algorithm One") | |
for _, d := range data { | |
output.Reset() | |
algorithmOne(d.input, &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
fmt.Println("=======================================\nRunning Algorithm Two") | |
for _, d := range data { | |
output.Reset() | |
algorithmTwo(d.input, &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
fmt.Println("=======================================\nRunning Algorithm Three") | |
for _, d := range data { | |
output.Reset() | |
algorithmThree(bytes.NewReader(d.input), &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
fmt.Println("=======================================\nRunning Algorithm Four") | |
for _, d := range data { | |
output.Reset() | |
algorithmFour(bytes.NewReader(d.input), &output) | |
matched := bytes.Compare(d.output, output.Bytes()) | |
fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes()) | |
} | |
} | |
// algorithmOne is one way to solve the problem. | |
func algorithmOne(data []byte, output *bytes.Buffer) { | |
// Use a bytes Reader to provide a stream to process. | |
input := bytes.NewReader(data) | |
// Declare the buffer we need to process the stream. | |
buf := make([]byte, size) | |
end := size - 1 | |
// Read in an initial number of bytes we need to get started. | |
if n, err := io.ReadFull(input, buf[:end]); err != nil { | |
output.Write(buf[:n]) | |
return | |
} | |
for { | |
// Read in one byte from the input stream. | |
b, err := input.ReadByte() | |
if err != nil { | |
// Flush the reset of the bytes we have. | |
output.Write(buf[:end]) | |
break | |
} | |
// Add this byte to the end of the buffer. | |
buf[end] = b | |
// If we have a match, replace the bytes. | |
if bytes.Compare(buf, find) == 0 { | |
copy(buf, repl) | |
} | |
// Write the front byte since it has been compared. | |
output.WriteByte(buf[0]) | |
// Slice that front byte out. | |
copy(buf, buf[1:]) | |
} | |
} | |
// algorithmTwo is a second way to solve the problem. | |
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell | |
func algorithmTwo(data []byte, output *bytes.Buffer) { | |
// Use the bytes Reader to provide a stream to process. | |
input := bytes.NewReader(data) | |
// Create an index variable to match bytes. | |
idx := 0 | |
for { | |
// Read a single byte from our input. | |
b, err := input.ReadByte() | |
if err != nil { | |
break | |
} | |
// Does this byte match the byte at this offset? | |
if b == find[idx] { | |
// It matches so increment the index position. | |
idx++ | |
// If every byte has been matched, write | |
// out the replacement. | |
if idx == size { | |
output.Write(repl) | |
idx = 0 | |
} | |
continue | |
} | |
// Did we have any sort of match on any given byte? | |
if idx != 0 { | |
// Write what we've matched up to this point. | |
output.Write(find[:idx]) | |
// Unread the unmatched byte so it can be processed again. | |
input.UnreadByte() | |
// Reset the offset to start matching from the beginning. | |
idx = 0 | |
continue | |
} | |
// There was no previous match. Write byte and reset. | |
output.WriteByte(b) | |
idx = 0 | |
} | |
} | |
// algorithmThree is a second way to solve the problem. This approach takes an | |
// io.Reader to represent an infinite stream. This allows for the algorithm to | |
// accept input from just about anywhere, thanks to the beauty of Go | |
// interfaces. | |
// | |
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell | |
func algorithmThree(data io.Reader, output *bytes.Buffer) { | |
// Use bufio.NewReaderSize to provide a stream from which we can read and | |
// unread single bytes. | |
input := bufio.NewReaderSize(data, 1) | |
// Create an index variable to match bytes. | |
idx := 0 | |
for { | |
// Read a single byte from our input. | |
b, err := input.ReadByte() | |
if err != nil { | |
break | |
} | |
// Does this byte match the byte at this offset? | |
if b == find[idx] { | |
// It matches so increment the index position. | |
idx++ | |
// If every byte has been matched, write | |
// out the replacement. | |
if idx == size { | |
output.Write(repl) | |
idx = 0 | |
} | |
continue | |
} | |
// Did we have any sort of match on any given byte? | |
if idx != 0 { | |
// Write what we've matched up to this point. | |
output.Write(find[:idx]) | |
// Unread the unmatched byte so it can be processed again. | |
input.UnreadByte() | |
// Reset the offset to start matching from the beginning. | |
idx = 0 | |
continue | |
} | |
// There was no previous match. Write byte and reset. | |
output.WriteByte(b) | |
idx = 0 | |
} | |
} | |
// algorithmFour is a fourth way to solve the problem. This approach takes an | |
// io.Reader to represent an infinite stream. This allows for the algorithm to | |
// accept input from just about anywhere, thanks to the beauty of Go | |
// interfaces. | |
// | |
// Additionally, it has been optimized for minimal allocations by reading | |
// directly from the stream onto the stack. This results in 1 allocation of 1 | |
// byte at the time of writing. | |
// | |
// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell | |
func algorithmFour(data io.Reader, output *bytes.Buffer) { | |
// Create a byte slice of length 1 into which our byte will be read. | |
b := make([]byte, 1) | |
// Create an index variable to match bytes. | |
idx := 0 | |
for { | |
// Are we re-using the byte from a previous call? | |
if b[0] == 0 { | |
// Read a single byte from our input. | |
n, err := data.Read(b) | |
if n == 0 || err != nil { | |
break | |
} | |
} | |
// Does this byte match the byte at this offset? | |
if b[0] == find[idx] { | |
// It matches so increment the index position. | |
idx++ | |
// If every byte has been matched, write | |
// out the replacement. | |
if idx == size { | |
output.Write(repl) | |
idx = 0 | |
} | |
// Reset the reader byte to 0 so another byte will be read. | |
b[0] = 0 | |
continue | |
} | |
// Did we have any sort of match on any given byte? | |
if idx != 0 { | |
// Write what we've matched up to this point. | |
output.Write(find[:idx]) | |
// NOTE: we are NOT resetting the reader byte to 0 here because we need | |
// to re-use it on the next call. This is equivalent to the UnreadByte() | |
// call in the other functions. | |
// Reset the offset to start matching from the beginning. | |
idx = 0 | |
continue | |
} | |
// There was no previous match. Write byte and reset. | |
output.WriteByte(b[0]) | |
// Reset the reader byte to 0 so another byte will be read. | |
b[0] = 0 | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// All material is licensed under the Apache License Version 2.0, January 2004 | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// go test -run none -bench . -benchtime 3s -benchmem | |
// Tests to see how each algorithm compare. | |
package main | |
import ( | |
"bytes" | |
"testing" | |
) | |
// assembleInputStream appends all the input slices together to allow for | |
// consistent testing across all benchmarks | |
func assembleInputStream() []byte { | |
var out []byte | |
for _, d := range data { | |
out = append(out, d.input...) | |
} | |
return out | |
} | |
// Capture the time it takes to execute algorithm one. | |
func BenchmarkAlgorithmOne(b *testing.B) { | |
var output bytes.Buffer | |
in := assembleInputStream() | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
algorithmOne(in, &output) | |
} | |
} | |
// Capture the time it takes to execute algorithm two. | |
func BenchmarkAlgorithmTwo(b *testing.B) { | |
var output bytes.Buffer | |
in := assembleInputStream() | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
algorithmTwo(in, &output) | |
} | |
} | |
// Capture the time it takes to execute algorithm three. | |
func BenchmarkAlgorithmThree(b *testing.B) { | |
var output bytes.Buffer | |
in := bytes.NewReader(assembleInputStream()) | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
// Seek our reader to the beginning of the byte array | |
in.Seek(0, 0) | |
algorithmThree(in, &output) | |
} | |
} | |
// Capture the time it takes to execute algorithm four. | |
func BenchmarkAlgorithmFour(b *testing.B) { | |
var output bytes.Buffer | |
in := bytes.NewReader(assembleInputStream()) | |
for i := 0; i < b.N; i++ { | |
output.Reset() | |
// Seek our reader to the beginning of the byte array | |
in.Seek(0, 0) | |
algorithmFour(in, &output) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BenchmarkAlgorithmOne-8 2000000 2803 ns/op 53 B/op 2 allocs/op | |
BenchmarkAlgorithmTwo-8 5000000 1042 ns/op 0 B/op 0 allocs/op | |
BenchmarkAlgorithmThree-8 3000000 1583 ns/op 16 B/op 1 allocs/op | |
BenchmarkAlgorithmFour-8 2000000 2030 ns/op 1 B/op 1 allocs/op |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment