Created
September 3, 2021 13:02
-
-
Save idispatch/b9f10c99399f97c7973722f57a03f5d6 to your computer and use it in GitHub Desktop.
Preprocessor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module preprocessor | |
go 1.16 | |
require github.com/stretchr/testify v1.7.0 // indirect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"os" | |
) | |
func readBytesFromFileNamed(fileName string) ([]byte, error) { | |
f, err := os.Open(fileName) | |
if err != nil { | |
return nil, err | |
} | |
defer func() { _ = f.Close() }() | |
// not using io.ReadAll to not over-allocate memory | |
i, err := f.Stat() | |
if err != nil { | |
return nil, err | |
} | |
size := i.Size() | |
b := make([]byte, size, size) | |
p := int64(0) | |
for p < size { | |
n, err := f.Read(b[p:]) | |
if err != nil { | |
return nil, err | |
} | |
if n == 0 { | |
return nil, fmt.Errorf(" ??") | |
} | |
p += int64(n) | |
} | |
return b, nil | |
} | |
type preprocessorState int8 | |
const ( | |
stateNormal preprocessorState = iota | |
stateSawBackSlash | |
stateSawForwardSlash | |
stateSawSingleQuote | |
stateSawDoubleQuote | |
stateSawSingleQuoteEscape | |
stateSawDoubleQuoteEscape | |
stateExpectsSingleQuote | |
stateSingleLineComment | |
stateMultiLineComment | |
stateMultiLineCommentSawStar | |
stateInvalid | |
) | |
type LineInfo struct { | |
line, column int // Lines and columns are counted from 1 | |
} | |
func (i LineInfo) String() string { | |
return fmt.Sprintf("%v:%v", i.line, i.column) | |
} | |
type FileLineInfo struct { | |
LineInfo | |
fileName string | |
} | |
func (f FileLineInfo) String() string { | |
return fmt.Sprintf("%v:%v", f.fileName, f.LineInfo) | |
} | |
type Preprocessor struct { | |
buffer []byte | |
pos int | |
info FileLineInfo | |
state preprocessorState | |
// TODO: stash | |
RemoveMultiLineCommentEmptyLines bool | |
} | |
func NewPreprocessorForBuffer(buffer []byte) (*Preprocessor, error) { | |
return &Preprocessor{ | |
info: FileLineInfo{LineInfo{line: 1, column: 0}, ""}, | |
pos: 0, | |
RemoveMultiLineCommentEmptyLines: false, | |
buffer: buffer, | |
state: stateNormal, | |
}, nil | |
} | |
func NewPreprocessorForFileNamed(fileName string) (*Preprocessor, error) { | |
buffer, err := readBytesFromFileNamed(fileName) | |
if err != nil { | |
return nil, err | |
} | |
p, err := NewPreprocessorForBuffer(buffer) | |
if err != nil { | |
return nil, err | |
} | |
p.info.fileName = fileName | |
return p, nil | |
} | |
func (p *Preprocessor) String() string { | |
return fmt.Sprintf("%v, pos=%v, state=%v", p.info, p.pos, p.state) | |
} | |
func (p *Preprocessor) IsEOF() bool { | |
return p.pos >= len(p.buffer) | |
} | |
func (p *Preprocessor) FileSize() int { | |
return len(p.buffer) | |
} | |
func (p *Preprocessor) Reset() { | |
p.state = stateNormal | |
p.pos = 0 | |
p.info.line = 1 | |
p.info.column = 1 | |
} | |
func (p *Preprocessor) Read(target []byte) (n int, err error) { | |
const ( | |
star = '*' | |
forwardSlash = '/' | |
backSlash = '\\' | |
singleQuote = '\'' | |
doubleQuote = '"' | |
carriageReturn = '\n' | |
) | |
dst := 0 | |
for dst < len(target) && p.pos < len(p.buffer) { | |
// On every iteration this code reads one byte | |
// and produces one or two bytes. | |
c := p.buffer[p.pos] | |
p.pos += 1 | |
p.info.column += 1 | |
switch p.state { | |
case stateNormal: | |
switch c { | |
case singleQuote: // handle 'a' char | |
target[dst] = c | |
dst++ | |
p.state = stateSawSingleQuote | |
continue | |
case doubleQuote: // handle "string" | |
target[dst] = c | |
dst++ | |
p.state = stateSawDoubleQuote | |
continue | |
case forwardSlash: // prepare for single- or multi- line comment | |
p.state = stateSawForwardSlash | |
continue | |
case backSlash: // possible next char state is '\' (escape) or '\n' (line concatenate) | |
p.state = stateSawBackSlash | |
continue | |
case carriageReturn: // handle next line | |
p.info.line += 1 | |
target[dst] = c | |
dst++ | |
continue | |
default: // handle simple character | |
target[dst] = c | |
dst++ | |
continue | |
} | |
case stateSawBackSlash: | |
switch c { | |
case carriageReturn: // not incrementing line counter and not updating column here | |
p.state = stateNormal // not producing output here - concatenate lines | |
continue | |
case backSlash: // handle "\\" | |
target[dst] = c | |
dst++ | |
if dst < len(target) { | |
target[dst] = c | |
dst++ | |
} else { | |
return 0, fmt.Errorf("??? - not enough space") | |
} | |
p.state = stateNormal | |
continue | |
default: // handle "\" escape | |
target[dst] = backSlash | |
dst++ | |
if dst < len(target) { | |
target[dst] = c | |
dst++ | |
} else { | |
return 0, fmt.Errorf("??? - not enough space") | |
} | |
p.state = stateNormal | |
continue | |
} | |
case stateSawForwardSlash: | |
switch c { | |
case forwardSlash: // handle start of // single line comment | |
p.state = stateSingleLineComment | |
continue | |
case star: // handle start of /* multi line comment | |
p.state = stateMultiLineComment | |
continue | |
default: // handle other cases | |
target[dst] = forwardSlash | |
dst++ | |
if dst < len(target) { | |
target[dst] = c | |
dst++ | |
} else { | |
return 0, fmt.Errorf("??? - not enough space") | |
} | |
p.state = stateNormal | |
continue | |
} | |
case stateSawSingleQuote: | |
switch c { | |
case singleQuote: // invalid '' sequence | |
p.state = stateInvalid | |
return 0, fmt.Errorf("unexpected \"'\" at %v", p.info) | |
case backSlash: // handle char escape | |
target[dst] = c | |
dst++ | |
p.state = stateSawSingleQuoteEscape | |
continue | |
default: // handle one character | |
target[dst] = c | |
dst++ | |
p.state = stateExpectsSingleQuote | |
continue | |
} | |
case stateSawDoubleQuote: | |
switch c { | |
case doubleQuote: // handle end of a string | |
target[dst] = c | |
dst++ | |
p.state = stateNormal | |
continue | |
case backSlash: // prepare to handle escape sequence or string continuation | |
p.state = stateSawDoubleQuoteEscape | |
continue | |
default: // handle regular character | |
target[dst] = c | |
dst++ | |
continue | |
} | |
case stateSawSingleQuoteEscape: | |
switch c { | |
default: // handle any character | |
target[dst] = c | |
dst++ | |
p.state = stateExpectsSingleQuote // proceed to "'" | |
continue | |
} | |
case stateSawDoubleQuoteEscape: | |
switch c { | |
case carriageReturn: | |
p.info.line += 1 | |
p.state = stateSawDoubleQuote // proceed to "" string | |
continue | |
default: // handle any character | |
target[dst] = backSlash | |
dst++ | |
if dst < len(target) { | |
target[dst] = c | |
dst++ | |
} else { | |
return 0, fmt.Errorf("??? - not enough space") | |
} | |
p.state = stateSawDoubleQuote // proceed to "" string | |
continue | |
} | |
case stateExpectsSingleQuote: | |
switch c { | |
case singleQuote: // done with character parsing | |
target[dst] = c | |
dst++ | |
p.state = stateNormal | |
continue | |
default: // nothing else allowed | |
p.state = stateInvalid | |
return 0, fmt.Errorf("invalid parser state %v at %v", p.state, p.info) | |
} | |
case stateSingleLineComment: | |
switch c { | |
case carriageReturn: // end of a single line comment | |
p.info.line += 1 | |
target[dst] = c // preserve line count | |
dst++ | |
p.state = stateNormal | |
continue | |
default: // proceed the single line comment | |
// should handle '\' ? | |
continue | |
} | |
case stateMultiLineComment: | |
switch c { | |
case star: // prepare to handle closing of multi-line comment | |
p.state = stateMultiLineCommentSawStar | |
continue | |
case carriageReturn: // handle next line | |
p.info.line += 1 | |
if !p.RemoveMultiLineCommentEmptyLines { | |
target[dst] = c // preserve line count | |
dst++ | |
} | |
continue | |
default: // proceed skipping characters in multi-line comments | |
continue | |
} | |
case stateMultiLineCommentSawStar: | |
switch c { | |
case forwardSlash: // process multi-line comment close | |
p.state = stateNormal | |
continue | |
case star: // prepare to handle closing of multi-line comment | |
p.state = stateMultiLineCommentSawStar | |
continue | |
default: // not closing multi-line comment | |
p.state = stateMultiLineComment | |
continue | |
} | |
case stateInvalid: | |
return 0, fmt.Errorf("invalid parser state %v at %v", p.state, p.info) | |
default: | |
return 0, fmt.Errorf("invalid parser state %v at %v", p.state, p.info) | |
} | |
} | |
return dst, nil | |
} | |
func main() { | |
preprocessor, err := NewPreprocessorForFileNamed(os.Args[1]) | |
if err != nil { | |
os.Exit(1) | |
} | |
b := make([]byte, preprocessor.FileSize()) | |
for { | |
n, err := preprocessor.Read(b) | |
if err != nil { | |
return | |
} | |
if n == 0 { | |
break | |
} | |
t := string(b[:n]) | |
fmt.Print(t) | |
} | |
if err != nil { | |
os.Exit(2) | |
} | |
os.Exit(0) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"testing" | |
) | |
import "github.com/stretchr/testify/require" | |
func TestLineContinuation(t *testing.T) { | |
input := `#define CALC ( a , b ) \ | |
( ( a * b ) + ( a - 2 ) - \ | |
( b * 2 ) ) | |
` | |
p, err := NewPreprocessorForBuffer([]byte(input)) | |
require.Nil(t, err, "Failed to create parser") | |
require.False(t, p.IsEOF()) | |
expected := `#define CALC ( a , b ) ( ( a * b ) + ( a - 2 ) - ( b * 2 ) ) | |
` | |
result := make([]byte, 1024) | |
n, err := p.Read(result) | |
require.Nil(t, err, "Failed to read from buffer") | |
require.Equal(t, len(expected), n) | |
actual := string(result[:n]) | |
require.Equal(t, expected, actual) | |
} | |
func TestSingleLineComment(t *testing.T) { | |
input := `abc | |
def // hello | |
ghi | |
` | |
p, err := NewPreprocessorForBuffer([]byte(input)) | |
require.Nil(t, err, "Failed to create parser") | |
require.False(t, p.IsEOF()) | |
expected := `abc | |
def | |
ghi | |
` | |
result := make([]byte, 1024) | |
n, err := p.Read(result) | |
require.Nil(t, err, "Failed to read from buffer") | |
require.Equal(t, len(expected), n) | |
actual := string(result[:n]) | |
require.Equal(t, expected, actual) | |
} | |
func TestMultiLineComment(t *testing.T) { | |
input := `abc | |
def /* hello | |
ghi***/ | |
tst | |
` | |
p, err := NewPreprocessorForBuffer([]byte(input)) | |
require.Nil(t, err, "Failed to create parser") | |
require.False(t, p.IsEOF()) | |
expected := `abc | |
def | |
tst | |
` | |
result := make([]byte, 1024) | |
n, err := p.Read(result) | |
require.Nil(t, err, "Failed to read from buffer") | |
require.Equal(t, len(expected), n) | |
actual := string(result[:n]) | |
require.Equal(t, expected, actual) | |
} | |
func TestEscapeSequences(t *testing.T) { | |
input := `hello "world \" where"` | |
p, err := NewPreprocessorForBuffer([]byte(input)) | |
require.Nil(t, err, "Failed to create parser") | |
require.False(t, p.IsEOF()) | |
result := make([]byte, 1024) | |
n, err := p.Read(result) | |
require.Nil(t, err, "Failed to read from buffer") | |
require.Equal(t, len(input), n) | |
actual := string(result[:n]) | |
require.Equal(t, input, actual) | |
input = `hello '\t'` | |
p, err = NewPreprocessorForBuffer([]byte(input)) | |
require.Nil(t, err, "Failed to create parser") | |
require.False(t, p.IsEOF()) | |
n, err = p.Read(result) | |
require.Nil(t, err, "Failed to read from buffer") | |
require.Equal(t, len(input), n) | |
actual = string(result[:n]) | |
require.Equal(t, input, actual) | |
input = `"hello \ | |
world"` | |
p, err = NewPreprocessorForBuffer([]byte(input)) | |
require.Nil(t, err, "Failed to create parser") | |
require.False(t, p.IsEOF()) | |
n, err = p.Read(result) | |
require.Nil(t, err, "Failed to read from buffer") | |
actual = string(result[:n]) | |
expected := `"hello world"` | |
require.Equal(t, expected, actual) | |
} | |
func TestVarious(t *testing.T) { | |
type testData struct { | |
input string | |
expected string} | |
tests := []testData{ | |
testData{``, ``}, | |
testData{`abc`, `abc`}, | |
testData{`abc | |
`, `abc | |
`}, | |
testData{` | |
abc | |
`, ` | |
abc | |
`}, | |
testData{`/* empty */`, ``}, | |
testData{` /* not empty */`, ` `}, | |
testData{`/* not empty */ `, ` `}, | |
testData{` /* not empty */ `, ` `}, | |
testData{` // anything`, ` `}, | |
testData{` // anything | |
`, ` | |
`}} | |
for i, test := range tests { | |
p, err := NewPreprocessorForBuffer([]byte(test.input)) | |
require.Nil(t, err, "Failed to create parser (test %v)", i) | |
require.Equal(t, len(test.input) == 0, p.IsEOF()) | |
result := make([]byte, 1024) | |
n, err := p.Read(result) | |
require.Nil(t, err, "Failed to read from buffer (test %v)", i) | |
require.Equal(t, len(test.expected), n) | |
actual := string(result[:n]) | |
require.Equal(t, test.expected, actual) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment