Skip to content

Instantly share code, notes, and snippets.

@idispatch
Created September 3, 2021 13:02
Show Gist options
  • Save idispatch/b9f10c99399f97c7973722f57a03f5d6 to your computer and use it in GitHub Desktop.
Save idispatch/b9f10c99399f97c7973722f57a03f5d6 to your computer and use it in GitHub Desktop.
Preprocessor
module preprocessor
go 1.16
require github.com/stretchr/testify v1.7.0 // indirect
package main
import (
"fmt"
"os"
)
func readBytesFromFileNamed(fileName string) ([]byte, error) {
f, err := os.Open(fileName)
if err != nil {
return nil, err
}
defer func() { _ = f.Close() }()
// not using io.ReadAll to not over-allocate memory
i, err := f.Stat()
if err != nil {
return nil, err
}
size := i.Size()
b := make([]byte, size, size)
p := int64(0)
for p < size {
n, err := f.Read(b[p:])
if err != nil {
return nil, err
}
if n == 0 {
return nil, fmt.Errorf(" ??")
}
p += int64(n)
}
return b, nil
}
type preprocessorState int8
const (
stateNormal preprocessorState = iota
stateSawBackSlash
stateSawForwardSlash
stateSawSingleQuote
stateSawDoubleQuote
stateSawSingleQuoteEscape
stateSawDoubleQuoteEscape
stateExpectsSingleQuote
stateSingleLineComment
stateMultiLineComment
stateMultiLineCommentSawStar
stateInvalid
)
type LineInfo struct {
line, column int // Lines and columns are counted from 1
}
func (i LineInfo) String() string {
return fmt.Sprintf("%v:%v", i.line, i.column)
}
type FileLineInfo struct {
LineInfo
fileName string
}
func (f FileLineInfo) String() string {
return fmt.Sprintf("%v:%v", f.fileName, f.LineInfo)
}
type Preprocessor struct {
buffer []byte
pos int
info FileLineInfo
state preprocessorState
// TODO: stash
RemoveMultiLineCommentEmptyLines bool
}
func NewPreprocessorForBuffer(buffer []byte) (*Preprocessor, error) {
return &Preprocessor{
info: FileLineInfo{LineInfo{line: 1, column: 0}, ""},
pos: 0,
RemoveMultiLineCommentEmptyLines: false,
buffer: buffer,
state: stateNormal,
}, nil
}
func NewPreprocessorForFileNamed(fileName string) (*Preprocessor, error) {
buffer, err := readBytesFromFileNamed(fileName)
if err != nil {
return nil, err
}
p, err := NewPreprocessorForBuffer(buffer)
if err != nil {
return nil, err
}
p.info.fileName = fileName
return p, nil
}
func (p *Preprocessor) String() string {
return fmt.Sprintf("%v, pos=%v, state=%v", p.info, p.pos, p.state)
}
func (p *Preprocessor) IsEOF() bool {
return p.pos >= len(p.buffer)
}
func (p *Preprocessor) FileSize() int {
return len(p.buffer)
}
func (p *Preprocessor) Reset() {
p.state = stateNormal
p.pos = 0
p.info.line = 1
p.info.column = 1
}
func (p *Preprocessor) Read(target []byte) (n int, err error) {
const (
star = '*'
forwardSlash = '/'
backSlash = '\\'
singleQuote = '\''
doubleQuote = '"'
carriageReturn = '\n'
)
dst := 0
for dst < len(target) && p.pos < len(p.buffer) {
// On every iteration this code reads one byte
// and produces one or two bytes.
c := p.buffer[p.pos]
p.pos += 1
p.info.column += 1
switch p.state {
case stateNormal:
switch c {
case singleQuote: // handle 'a' char
target[dst] = c
dst++
p.state = stateSawSingleQuote
continue
case doubleQuote: // handle "string"
target[dst] = c
dst++
p.state = stateSawDoubleQuote
continue
case forwardSlash: // prepare for single- or multi- line comment
p.state = stateSawForwardSlash
continue
case backSlash: // possible next char state is '\' (escape) or '\n' (line concatenate)
p.state = stateSawBackSlash
continue
case carriageReturn: // handle next line
p.info.line += 1
target[dst] = c
dst++
continue
default: // handle simple character
target[dst] = c
dst++
continue
}
case stateSawBackSlash:
switch c {
case carriageReturn: // not incrementing line counter and not updating column here
p.state = stateNormal // not producing output here - concatenate lines
continue
case backSlash: // handle "\\"
target[dst] = c
dst++
if dst < len(target) {
target[dst] = c
dst++
} else {
return 0, fmt.Errorf("??? - not enough space")
}
p.state = stateNormal
continue
default: // handle "\" escape
target[dst] = backSlash
dst++
if dst < len(target) {
target[dst] = c
dst++
} else {
return 0, fmt.Errorf("??? - not enough space")
}
p.state = stateNormal
continue
}
case stateSawForwardSlash:
switch c {
case forwardSlash: // handle start of // single line comment
p.state = stateSingleLineComment
continue
case star: // handle start of /* multi line comment
p.state = stateMultiLineComment
continue
default: // handle other cases
target[dst] = forwardSlash
dst++
if dst < len(target) {
target[dst] = c
dst++
} else {
return 0, fmt.Errorf("??? - not enough space")
}
p.state = stateNormal
continue
}
case stateSawSingleQuote:
switch c {
case singleQuote: // invalid '' sequence
p.state = stateInvalid
return 0, fmt.Errorf("unexpected \"'\" at %v", p.info)
case backSlash: // handle char escape
target[dst] = c
dst++
p.state = stateSawSingleQuoteEscape
continue
default: // handle one character
target[dst] = c
dst++
p.state = stateExpectsSingleQuote
continue
}
case stateSawDoubleQuote:
switch c {
case doubleQuote: // handle end of a string
target[dst] = c
dst++
p.state = stateNormal
continue
case backSlash: // prepare to handle escape sequence or string continuation
p.state = stateSawDoubleQuoteEscape
continue
default: // handle regular character
target[dst] = c
dst++
continue
}
case stateSawSingleQuoteEscape:
switch c {
default: // handle any character
target[dst] = c
dst++
p.state = stateExpectsSingleQuote // proceed to "'"
continue
}
case stateSawDoubleQuoteEscape:
switch c {
case carriageReturn:
p.info.line += 1
p.state = stateSawDoubleQuote // proceed to "" string
continue
default: // handle any character
target[dst] = backSlash
dst++
if dst < len(target) {
target[dst] = c
dst++
} else {
return 0, fmt.Errorf("??? - not enough space")
}
p.state = stateSawDoubleQuote // proceed to "" string
continue
}
case stateExpectsSingleQuote:
switch c {
case singleQuote: // done with character parsing
target[dst] = c
dst++
p.state = stateNormal
continue
default: // nothing else allowed
p.state = stateInvalid
return 0, fmt.Errorf("invalid parser state %v at %v", p.state, p.info)
}
case stateSingleLineComment:
switch c {
case carriageReturn: // end of a single line comment
p.info.line += 1
target[dst] = c // preserve line count
dst++
p.state = stateNormal
continue
default: // proceed the single line comment
// should handle '\' ?
continue
}
case stateMultiLineComment:
switch c {
case star: // prepare to handle closing of multi-line comment
p.state = stateMultiLineCommentSawStar
continue
case carriageReturn: // handle next line
p.info.line += 1
if !p.RemoveMultiLineCommentEmptyLines {
target[dst] = c // preserve line count
dst++
}
continue
default: // proceed skipping characters in multi-line comments
continue
}
case stateMultiLineCommentSawStar:
switch c {
case forwardSlash: // process multi-line comment close
p.state = stateNormal
continue
case star: // prepare to handle closing of multi-line comment
p.state = stateMultiLineCommentSawStar
continue
default: // not closing multi-line comment
p.state = stateMultiLineComment
continue
}
case stateInvalid:
return 0, fmt.Errorf("invalid parser state %v at %v", p.state, p.info)
default:
return 0, fmt.Errorf("invalid parser state %v at %v", p.state, p.info)
}
}
return dst, nil
}
func main() {
preprocessor, err := NewPreprocessorForFileNamed(os.Args[1])
if err != nil {
os.Exit(1)
}
b := make([]byte, preprocessor.FileSize())
for {
n, err := preprocessor.Read(b)
if err != nil {
return
}
if n == 0 {
break
}
t := string(b[:n])
fmt.Print(t)
}
if err != nil {
os.Exit(2)
}
os.Exit(0)
}
package main
import (
"testing"
)
import "github.com/stretchr/testify/require"
func TestLineContinuation(t *testing.T) {
input := `#define CALC ( a , b ) \
( ( a * b ) + ( a - 2 ) - \
( b * 2 ) )
`
p, err := NewPreprocessorForBuffer([]byte(input))
require.Nil(t, err, "Failed to create parser")
require.False(t, p.IsEOF())
expected := `#define CALC ( a , b ) ( ( a * b ) + ( a - 2 ) - ( b * 2 ) )
`
result := make([]byte, 1024)
n, err := p.Read(result)
require.Nil(t, err, "Failed to read from buffer")
require.Equal(t, len(expected), n)
actual := string(result[:n])
require.Equal(t, expected, actual)
}
func TestSingleLineComment(t *testing.T) {
input := `abc
def // hello
ghi
`
p, err := NewPreprocessorForBuffer([]byte(input))
require.Nil(t, err, "Failed to create parser")
require.False(t, p.IsEOF())
expected := `abc
def
ghi
`
result := make([]byte, 1024)
n, err := p.Read(result)
require.Nil(t, err, "Failed to read from buffer")
require.Equal(t, len(expected), n)
actual := string(result[:n])
require.Equal(t, expected, actual)
}
func TestMultiLineComment(t *testing.T) {
input := `abc
def /* hello
ghi***/
tst
`
p, err := NewPreprocessorForBuffer([]byte(input))
require.Nil(t, err, "Failed to create parser")
require.False(t, p.IsEOF())
expected := `abc
def
tst
`
result := make([]byte, 1024)
n, err := p.Read(result)
require.Nil(t, err, "Failed to read from buffer")
require.Equal(t, len(expected), n)
actual := string(result[:n])
require.Equal(t, expected, actual)
}
func TestEscapeSequences(t *testing.T) {
input := `hello "world \" where"`
p, err := NewPreprocessorForBuffer([]byte(input))
require.Nil(t, err, "Failed to create parser")
require.False(t, p.IsEOF())
result := make([]byte, 1024)
n, err := p.Read(result)
require.Nil(t, err, "Failed to read from buffer")
require.Equal(t, len(input), n)
actual := string(result[:n])
require.Equal(t, input, actual)
input = `hello '\t'`
p, err = NewPreprocessorForBuffer([]byte(input))
require.Nil(t, err, "Failed to create parser")
require.False(t, p.IsEOF())
n, err = p.Read(result)
require.Nil(t, err, "Failed to read from buffer")
require.Equal(t, len(input), n)
actual = string(result[:n])
require.Equal(t, input, actual)
input = `"hello \
world"`
p, err = NewPreprocessorForBuffer([]byte(input))
require.Nil(t, err, "Failed to create parser")
require.False(t, p.IsEOF())
n, err = p.Read(result)
require.Nil(t, err, "Failed to read from buffer")
actual = string(result[:n])
expected := `"hello world"`
require.Equal(t, expected, actual)
}
func TestVarious(t *testing.T) {
type testData struct {
input string
expected string}
tests := []testData{
testData{``, ``},
testData{`abc`, `abc`},
testData{`abc
`, `abc
`},
testData{`
abc
`, `
abc
`},
testData{`/* empty */`, ``},
testData{` /* not empty */`, ` `},
testData{`/* not empty */ `, ` `},
testData{` /* not empty */ `, ` `},
testData{` // anything`, ` `},
testData{` // anything
`, `
`}}
for i, test := range tests {
p, err := NewPreprocessorForBuffer([]byte(test.input))
require.Nil(t, err, "Failed to create parser (test %v)", i)
require.Equal(t, len(test.input) == 0, p.IsEOF())
result := make([]byte, 1024)
n, err := p.Read(result)
require.Nil(t, err, "Failed to read from buffer (test %v)", i)
require.Equal(t, len(test.expected), n)
actual := string(result[:n])
require.Equal(t, test.expected, actual)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment