tylerstillwater · August 5, 2016 16:57
diff --git a/elvis.go b/elvis.go
 // All material is licensed under the Apache License Version 2.0, January 2004
 // http://www.apache.org/licenses/LICENSE-2.0

 // Sample program that takes a stream of bytes and looks for the bytes
 // “elvis” and when they are found, replace them with “Elvis”. The code
 // cannot assume that there are any line feeds or other delimiters in the
 // stream and the code must assume that the stream is of any arbitrary length.
 // The solution cannot meaningfully buffer to the end of the stream and
 // then process the replacement.
 package main

 import (
 	"bufio"
 	"bytes"
 	"fmt"
 	"io"
 )

 // data represents a table of input and expected output.
 var data = []struct {
 	input  []byte
 	output []byte
 }{
 	{[]byte("abc"), []byte("abc")},
 	{[]byte("elvis"), []byte("Elvis")},
 	{[]byte("aElvis"), []byte("aElvis")},
 	{[]byte("abcelvis"), []byte("abcElvis")},
 	{[]byte("eelvis"), []byte("eElvis")},
 	{[]byte("aelvis"), []byte("aElvis")},
 	{[]byte("aabeeeelvis"), []byte("aabeeeElvis")},
 	{[]byte("e l v i s"), []byte("e l v i s")},
 	{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")},
 	{[]byte(" elvi s"), []byte(" elvi s")},
 	{[]byte("elvielvis"), []byte("elviElvis")},
 	{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")},
 	{[]byte("elvielviselvis"), []byte("elviElvisElvis")},
 }

 // Declare what needs to be found and its replacement.
 var find = []byte("elvis")
 var repl = []byte("Elvis")

 // Calculate the number of bytes we need to locate.
 var size = len(find)

 func main() {
 	var output bytes.Buffer

 	fmt.Println("=======================================\nRunning Algorithm One")
 	for _, d := range data {
 		output.Reset()
 		algorithmOne(d.input, &output)
 		matched := bytes.Compare(d.output, output.Bytes())
 		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
 	}

 	fmt.Println("=======================================\nRunning Algorithm Two")
 	for _, d := range data {
 		output.Reset()
 		algorithmTwo(d.input, &output)
 		matched := bytes.Compare(d.output, output.Bytes())
 		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
 	}

 	fmt.Println("=======================================\nRunning Algorithm Three")
 	for _, d := range data {
 		output.Reset()
 		algorithmThree(bytes.NewReader(d.input), &output)
 		matched := bytes.Compare(d.output, output.Bytes())
 		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
 	}

 	fmt.Println("=======================================\nRunning Algorithm Four")
 	for _, d := range data {
 		output.Reset()
 		algorithmFour(bytes.NewReader(d.input), &output)
 		matched := bytes.Compare(d.output, output.Bytes())
 		fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
 	}
 }

 // algorithmOne is one way to solve the problem.
 func algorithmOne(data []byte, output *bytes.Buffer) {

 	// Use a bytes Reader to provide a stream to process.
 	input := bytes.NewReader(data)

 	// Declare the buffer we need to process the stream.
 	buf := make([]byte, size)
 	end := size - 1

 	// Read in an initial number of bytes we need to get started.
 	if n, err := io.ReadFull(input, buf[:end]); err != nil {
 		output.Write(buf[:n])
 		return
 	}

 	for {

 		// Read in one byte from the input stream.
 		b, err := input.ReadByte()
 		if err != nil {

 			// Flush the reset of the bytes we have.
 			output.Write(buf[:end])
 			break
 		}

 		// Add this byte to the end of the buffer.
 		buf[end] = b

 		// If we have a match, replace the bytes.
 		if bytes.Compare(buf, find) == 0 {
 			copy(buf, repl)
 		}

 		// Write the front byte since it has been compared.
 		output.WriteByte(buf[0])

 		// Slice that front byte out.
 		copy(buf, buf[1:])
 	}
 }

 // algorithmTwo is a second way to solve the problem.
 // Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
 func algorithmTwo(data []byte, output *bytes.Buffer) {

 	// Use the bytes Reader to provide a stream to process.
 	input := bytes.NewReader(data)

 	// Create an index variable to match bytes.
 	idx := 0

 	for {

 		// Read a single byte from our input.
 		b, err := input.ReadByte()
 		if err != nil {
 			break
 		}

 		// Does this byte match the byte at this offset?
 		if b == find[idx] {

 			// It matches so increment the index position.
 			idx++

 			// If every byte has been matched, write
 			// out the replacement.
 			if idx == size {
 				output.Write(repl)
 				idx = 0
 			}

 			continue
 		}

 		// Did we have any sort of match on any given byte?
 		if idx != 0 {

 			// Write what we've matched up to this point.
 			output.Write(find[:idx])

 			// Unread the unmatched byte so it can be processed again.
 			input.UnreadByte()

 			// Reset the offset to start matching from the beginning.
 			idx = 0

 			continue
 		}

 		// There was no previous match. Write byte and reset.
 		output.WriteByte(b)
 		idx = 0
 	}
 }

 // algorithmThree is a second way to solve the problem. This approach takes an
 // io.Reader to represent an infinite stream. This allows for the algorithm to
 // accept input from just about anywhere, thanks to the beauty of Go
 // interfaces.
 //
 // Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
 func algorithmThree(data io.Reader, output *bytes.Buffer) {

 	// Use bufio.NewReaderSize to provide a stream from which we can read and
 	// unread single bytes.
 	input := bufio.NewReaderSize(data, 1)

 	// Create an index variable to match bytes.
 	idx := 0

 	for {

 		// Read a single byte from our input.
 		b, err := input.ReadByte()
 		if err != nil {
 			break
 		}

 		// Does this byte match the byte at this offset?
 		if b == find[idx] {

 			// It matches so increment the index position.
 			idx++

 			// If every byte has been matched, write
 			// out the replacement.
 			if idx == size {
 				output.Write(repl)
 				idx = 0
 			}

 			continue
 		}

 		// Did we have any sort of match on any given byte?
 		if idx != 0 {

 			// Write what we've matched up to this point.
 			output.Write(find[:idx])

 			// Unread the unmatched byte so it can be processed again.
 			input.UnreadByte()

 			// Reset the offset to start matching from the beginning.
 			idx = 0

 			continue
 		}

 		// There was no previous match. Write byte and reset.
 		output.WriteByte(b)
 		idx = 0
 	}
 }

 // algorithmFour is a fourth way to solve the problem. This approach takes an
 // io.Reader to represent an infinite stream. This allows for the algorithm to
 // accept input from just about anywhere, thanks to the beauty of Go
 // interfaces.
 //
 // Additionally, it has been optimized for minimal allocations by reading
 // directly from the stream onto the stack. This results in 1 allocation of 1
 // byte at the time of writing.
 //
 // Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
 func algorithmFour(data io.Reader, output *bytes.Buffer) {

 	// Create a byte slice of length 1 into which our byte will be read.
 	b := make([]byte, 1)

 	// Create an index variable to match bytes.
 	idx := 0

 	for {

 		// Are we re-using the byte from a previous call?
 		if b[0] == 0 {
 			// Read a single byte from our input.
 			n, err := data.Read(b)
 			if n == 0 || err != nil {
 				break
 			}
 		}

 		// Does this byte match the byte at this offset?
 		if b[0] == find[idx] {

 			// It matches so increment the index position.
 			idx++

 			// If every byte has been matched, write
 			// out the replacement.
 			if idx == size {
 				output.Write(repl)
 				idx = 0
 			}

 			// Reset the reader byte to 0 so another byte will be read.
 			b[0] = 0
 			continue
 		}

 		// Did we have any sort of match on any given byte?
 		if idx != 0 {

 			// Write what we've matched up to this point.
 			output.Write(find[:idx])

 			// NOTE: we are NOT resetting the reader byte to 0 here because we need
 			// to re-use it on the next call. This is equivalent to the UnreadByte()
 			// call in the other functions.

 			// Reset the offset to start matching from the beginning.
 			idx = 0

 			continue
 		}

 		// There was no previous match. Write byte and reset.
 		output.WriteByte(b[0])
 		// Reset the reader byte to 0 so another byte will be read.
 		b[0] = 0
 	}
 }
diff --git a/elvis_test.go b/elvis_test.go
 // All material is licensed under the Apache License Version 2.0, January 2004
 // http://www.apache.org/licenses/LICENSE-2.0

 // go test -run none -bench . -benchtime 3s -benchmem

 // Tests to see how each algorithm compare.
 package main

 import (
 	"bytes"
 	"testing"
 )

 // assembleInputStream appends all the input slices together to allow for
 // consistent testing across all benchmarks
 func assembleInputStream() []byte {
 	var out []byte
 	for _, d := range data {
 		out = append(out, d.input...)
 	}
 	return out
 }

 // Capture the time it takes to execute algorithm one.
 func BenchmarkAlgorithmOne(b *testing.B) {
 	var output bytes.Buffer
 	in := assembleInputStream()

 	for i := 0; i < b.N; i++ {
 		output.Reset()
 		algorithmOne(in, &output)
 	}
 }

 // Capture the time it takes to execute algorithm two.
 func BenchmarkAlgorithmTwo(b *testing.B) {
 	var output bytes.Buffer
 	in := assembleInputStream()

 	for i := 0; i < b.N; i++ {
 		output.Reset()
 		algorithmTwo(in, &output)
 	}
 }

 // Capture the time it takes to execute algorithm three.
 func BenchmarkAlgorithmThree(b *testing.B) {
 	var output bytes.Buffer
 	in := bytes.NewReader(assembleInputStream())

 	for i := 0; i < b.N; i++ {
 		output.Reset()
 		// Seek our reader to the beginning of the byte array
 		in.Seek(0, 0)
 		algorithmThree(in, &output)
 	}
 }

 // Capture the time it takes to execute algorithm four.
 func BenchmarkAlgorithmFour(b *testing.B) {
 	var output bytes.Buffer
 	in := bytes.NewReader(assembleInputStream())

 	for i := 0; i < b.N; i++ {
 		output.Reset()
 		// Seek our reader to the beginning of the byte array
 		in.Seek(0, 0)
 		algorithmFour(in, &output)
 	}
 }
diff --git a/elvis_test_results.txt b/elvis_test_results.txt
 BenchmarkAlgorithmOne-8        	 2000000       	      2803 ns/op       	      53 B/op  	       2 allocs/op
 BenchmarkAlgorithmTwo-8        	 5000000       	      1042 ns/op       	       0 B/op  	       0 allocs/op
 BenchmarkAlgorithmThree-8      	 3000000       	      1583 ns/op       	      16 B/op  	       1 allocs/op
 BenchmarkAlgorithmFour-8       	 2000000       	      2030 ns/op       	       1 B/op  	       1 allocs/op
	// All material is licensed under the Apache License Version 2.0, January 2004
	// http://www.apache.org/licenses/LICENSE-2.0

	// Sample program that takes a stream of bytes and looks for the bytes
	// “elvis” and when they are found, replace them with “Elvis”. The code
	// cannot assume that there are any line feeds or other delimiters in the
	// stream and the code must assume that the stream is of any arbitrary length.
	// The solution cannot meaningfully buffer to the end of the stream and
	// then process the replacement.
	package main

	import (
	"bufio"
	"bytes"
	"fmt"
	"io"
	)

	// data represents a table of input and expected output.
	var data = []struct {
	input []byte
	output []byte
	}{
	{[]byte("abc"), []byte("abc")},
	{[]byte("elvis"), []byte("Elvis")},
	{[]byte("aElvis"), []byte("aElvis")},
	{[]byte("abcelvis"), []byte("abcElvis")},
	{[]byte("eelvis"), []byte("eElvis")},
	{[]byte("aelvis"), []byte("aElvis")},
	{[]byte("aabeeeelvis"), []byte("aabeeeElvis")},
	{[]byte("e l v i s"), []byte("e l v i s")},
	{[]byte("aa bb e l v i saa"), []byte("aa bb e l v i saa")},
	{[]byte(" elvi s"), []byte(" elvi s")},
	{[]byte("elvielvis"), []byte("elviElvis")},
	{[]byte("elvielvielviselvi1"), []byte("elvielviElviselvi1")},
	{[]byte("elvielviselvis"), []byte("elviElvisElvis")},
	}

	// Declare what needs to be found and its replacement.
	var find = []byte("elvis")
	var repl = []byte("Elvis")

	// Calculate the number of bytes we need to locate.
	var size = len(find)

	func main() {
	var output bytes.Buffer

	fmt.Println("=======================================\nRunning Algorithm One")
	for _, d := range data {
	output.Reset()
	algorithmOne(d.input, &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Two")
	for _, d := range data {
	output.Reset()
	algorithmTwo(d.input, &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Three")
	for _, d := range data {
	output.Reset()
	algorithmThree(bytes.NewReader(d.input), &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}

	fmt.Println("=======================================\nRunning Algorithm Four")
	for _, d := range data {
	output.Reset()
	algorithmFour(bytes.NewReader(d.input), &output)
	matched := bytes.Compare(d.output, output.Bytes())
	fmt.Printf("Matched: %v Inp: [%s] Exp: [%s] Got: [%s]\n", matched == 0, d.input, d.output, output.Bytes())
	}
	}

	// algorithmOne is one way to solve the problem.
	func algorithmOne(data []byte, output *bytes.Buffer) {

	// Use a bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Declare the buffer we need to process the stream.
	buf := make([]byte, size)
	end := size - 1

	// Read in an initial number of bytes we need to get started.
	if n, err := io.ReadFull(input, buf[:end]); err != nil {
	output.Write(buf[:n])
	return
	}

	for {

	// Read in one byte from the input stream.
	b, err := input.ReadByte()
	if err != nil {

	// Flush the reset of the bytes we have.
	output.Write(buf[:end])
	break
	}

	// Add this byte to the end of the buffer.
	buf[end] = b

	// If we have a match, replace the bytes.
	if bytes.Compare(buf, find) == 0 {
	copy(buf, repl)
	}

	// Write the front byte since it has been compared.
	output.WriteByte(buf[0])

	// Slice that front byte out.
	copy(buf, buf[1:])
	}
	}

	// algorithmTwo is a second way to solve the problem.
	// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
	func algorithmTwo(data []byte, output *bytes.Buffer) {

	// Use the bytes Reader to provide a stream to process.
	input := bytes.NewReader(data)

	// Create an index variable to match bytes.
	idx := 0

	for {

	// Read a single byte from our input.
	b, err := input.ReadByte()
	if err != nil {
	break
	}

	// Does this byte match the byte at this offset?
	if b == find[idx] {

	// It matches so increment the index position.
	idx++

	// If every byte has been matched, write
	// out the replacement.
	if idx == size {
	output.Write(repl)
	idx = 0
	}

	continue
	}

	// Did we have any sort of match on any given byte?
	if idx != 0 {

	// Write what we've matched up to this point.
	output.Write(find[:idx])

	// Unread the unmatched byte so it can be processed again.
	input.UnreadByte()

	// Reset the offset to start matching from the beginning.
	idx = 0

	continue
	}

	// There was no previous match. Write byte and reset.
	output.WriteByte(b)
	idx = 0
	}
	}

	// algorithmThree is a second way to solve the problem. This approach takes an
	// io.Reader to represent an infinite stream. This allows for the algorithm to
	// accept input from just about anywhere, thanks to the beauty of Go
	// interfaces.
	//
	// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
	func algorithmThree(data io.Reader, output *bytes.Buffer) {

	// Use bufio.NewReaderSize to provide a stream from which we can read and
	// unread single bytes.
	input := bufio.NewReaderSize(data, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

	// Read a single byte from our input.
	b, err := input.ReadByte()
	if err != nil {
	break
	}

	// Does this byte match the byte at this offset?
	if b == find[idx] {

	// It matches so increment the index position.
	idx++

	// If every byte has been matched, write
	// out the replacement.
	if idx == size {
	output.Write(repl)
	idx = 0
	}

	continue
	}

	// Did we have any sort of match on any given byte?
	if idx != 0 {

	// Write what we've matched up to this point.
	output.Write(find[:idx])

	// Unread the unmatched byte so it can be processed again.
	input.UnreadByte()

	// Reset the offset to start matching from the beginning.
	idx = 0

	continue
	}

	// There was no previous match. Write byte and reset.
	output.WriteByte(b)
	idx = 0
	}
	}

	// algorithmFour is a fourth way to solve the problem. This approach takes an
	// io.Reader to represent an infinite stream. This allows for the algorithm to
	// accept input from just about anywhere, thanks to the beauty of Go
	// interfaces.
	//
	// Additionally, it has been optimized for minimal allocations by reading
	// directly from the stream onto the stack. This results in 1 allocation of 1
	// byte at the time of writing.
	//
	// Provided by Tyler Bunnell https://twitter.com/TylerJBunnell
	func algorithmFour(data io.Reader, output *bytes.Buffer) {

	// Create a byte slice of length 1 into which our byte will be read.
	b := make([]byte, 1)

	// Create an index variable to match bytes.
	idx := 0

	for {

	// Are we re-using the byte from a previous call?
	if b[0] == 0 {
	// Read a single byte from our input.
	n, err := data.Read(b)
	if n == 0 \|\| err != nil {
	break
	}
	}

	// Does this byte match the byte at this offset?
	if b[0] == find[idx] {

	// It matches so increment the index position.
	idx++

	// If every byte has been matched, write
	// out the replacement.
	if idx == size {
	output.Write(repl)
	idx = 0
	}

	// Reset the reader byte to 0 so another byte will be read.
	b[0] = 0
	continue
	}

	// Did we have any sort of match on any given byte?
	if idx != 0 {

	// Write what we've matched up to this point.
	output.Write(find[:idx])

	// NOTE: we are NOT resetting the reader byte to 0 here because we need
	// to re-use it on the next call. This is equivalent to the UnreadByte()
	// call in the other functions.

	// Reset the offset to start matching from the beginning.
	idx = 0

	continue
	}

	// There was no previous match. Write byte and reset.
	output.WriteByte(b[0])
	// Reset the reader byte to 0 so another byte will be read.
	b[0] = 0
	}
	}
	// All material is licensed under the Apache License Version 2.0, January 2004
	// http://www.apache.org/licenses/LICENSE-2.0

	// go test -run none -bench . -benchtime 3s -benchmem

	// Tests to see how each algorithm compare.
	package main

	import (
	"bytes"
	"testing"
	)

	// assembleInputStream appends all the input slices together to allow for
	// consistent testing across all benchmarks
	func assembleInputStream() []byte {
	var out []byte
	for _, d := range data {
	out = append(out, d.input...)
	}
	return out
	}

	// Capture the time it takes to execute algorithm one.
	func BenchmarkAlgorithmOne(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
	output.Reset()
	algorithmOne(in, &output)
	}
	}

	// Capture the time it takes to execute algorithm two.
	func BenchmarkAlgorithmTwo(b *testing.B) {
	var output bytes.Buffer
	in := assembleInputStream()

	for i := 0; i < b.N; i++ {
	output.Reset()
	algorithmTwo(in, &output)
	}
	}

	// Capture the time it takes to execute algorithm three.
	func BenchmarkAlgorithmThree(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
	output.Reset()
	// Seek our reader to the beginning of the byte array
	in.Seek(0, 0)
	algorithmThree(in, &output)
	}
	}

	// Capture the time it takes to execute algorithm four.
	func BenchmarkAlgorithmFour(b *testing.B) {
	var output bytes.Buffer
	in := bytes.NewReader(assembleInputStream())

	for i := 0; i < b.N; i++ {
	output.Reset()
	// Seek our reader to the beginning of the byte array
	in.Seek(0, 0)
	algorithmFour(in, &output)
	}
	}
	BenchmarkAlgorithmOne-8 2000000 2803 ns/op 53 B/op 2 allocs/op
	BenchmarkAlgorithmTwo-8 5000000 1042 ns/op 0 B/op 0 allocs/op
	BenchmarkAlgorithmThree-8 3000000 1583 ns/op 16 B/op 1 allocs/op
	BenchmarkAlgorithmFour-8 2000000 2030 ns/op 1 B/op 1 allocs/op