Created
October 1, 2019 22:55
-
-
Save lelandbatey/f8342ee5358efe61db1db11519e1176c to your computer and use it in GitHub Desktop.
Sane text formatter. Formats text in a 'nice' way, though very basic.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "bufio" | |
| "bytes" | |
| "fmt" | |
| "io" | |
| "io/ioutil" | |
| "os" | |
| "strings" | |
| "unicode" | |
| ) | |
| type RuneReader struct { | |
| Contents []rune | |
| ContentLen int | |
| RunePos int | |
| LineNo int | |
| } | |
| func (self *RuneReader) ReadRune() (rune, error) { | |
| var toret rune = 0 | |
| var err error | |
| if self.RunePos < self.ContentLen { | |
| toret = self.Contents[self.RunePos] | |
| if toret == '\n' { | |
| self.LineNo += 1 | |
| } | |
| self.RunePos += 1 | |
| } else { | |
| err = io.EOF | |
| } | |
| return toret, err | |
| } | |
| func (self *RuneReader) UnreadRune() error { | |
| if self.RunePos == 0 { | |
| return bufio.ErrInvalidUnreadRune | |
| } | |
| self.RunePos -= 1 | |
| switch self.Contents[self.RunePos] { | |
| case '\n': | |
| self.LineNo -= 1 | |
| } | |
| return nil | |
| } | |
| func NewRuneReader(r io.Reader) *RuneReader { | |
| b, _ := ioutil.ReadAll(r) | |
| contents := bytes.Runes(b) | |
| return &RuneReader{ | |
| Contents: contents, | |
| ContentLen: len(contents), | |
| RunePos: 0, | |
| LineNo: 1, | |
| } | |
| } | |
| func isIdent(r rune) bool { | |
| switch { | |
| case unicode.IsLetter(r): | |
| return true | |
| case unicode.IsDigit(r): | |
| return true | |
| case r == '_': | |
| return true | |
| default: | |
| return false | |
| } | |
| } | |
| type ScanUnit struct { | |
| BraceLevel int | |
| LineNo int | |
| Value []rune | |
| } | |
| func (self ScanUnit) String() string { | |
| cleanval := strings.Replace(string(self.Value), "\n", "\\n", -1) | |
| cleanval = strings.Replace(cleanval, "\t", "\\t", -1) | |
| cleanval = strings.Replace(cleanval, "\"", "\\\"", -1) | |
| return fmt.Sprintf(`{"value": "%v", "BraceLevel": %v, "LineNo": %v},`, cleanval, self.BraceLevel, self.LineNo) | |
| } | |
| var ( | |
| braceLev int = 0 | |
| ) | |
| func BuildScanUnit(rr *RuneReader) (*ScanUnit, error) { | |
| rv := &ScanUnit{ | |
| 0, | |
| 1, | |
| []rune{}, | |
| } | |
| var ch rune | |
| buf := make([]rune, 0) | |
| setReturn := func() *ScanUnit { | |
| rv.BraceLevel = braceLev | |
| rv.LineNo = rr.LineNo | |
| rv.Value = buf | |
| return rv | |
| } | |
| // Populate the buffer with at least one rune so even if it's an unknown | |
| // character it will at least return this | |
| ch, err := rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } | |
| buf = append(buf, ch) | |
| switch { | |
| case ch == '/': | |
| // Searching for comments beginning with '/' | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else if ch == '/' { | |
| // Handle single line comments of the form '//' | |
| buf = append(buf, ch) | |
| for { | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else if ch == '\n' { | |
| buf = append(buf, ch) | |
| return setReturn(), nil | |
| } | |
| buf = append(buf, ch) | |
| } | |
| } else if ch == '*' { | |
| // Handle (potentially) multi-line comments of the form '/**/' | |
| buf = append(buf, ch) | |
| for { | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else if ch == '*' { | |
| buf = append(buf, ch) | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else if ch == '/' { | |
| buf = append(buf, ch) | |
| return setReturn(), nil | |
| } | |
| } else { | |
| // Add the body of the comment to the buffer | |
| buf = append(buf, ch) | |
| } | |
| } | |
| } else { | |
| // Not a comment, so unread the last Rune and return this '/' only | |
| rr.UnreadRune() | |
| return setReturn(), nil | |
| } | |
| case ch == '"': | |
| // Handle strings | |
| for { | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else if ch == '\\' { | |
| // Handle escape sequences within strings | |
| buf = append(buf, ch) | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else { | |
| buf = append(buf, ch) | |
| } | |
| } else if ch == '"' { | |
| // Closing quotation | |
| buf = append(buf, ch) | |
| return setReturn(), nil | |
| } else { | |
| buf = append(buf, ch) | |
| } | |
| } | |
| case ch == '\'': | |
| // Handle single-quoted strings | |
| for { | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else if ch == '\\' { | |
| // Handle escape sequences within strings | |
| buf = append(buf, ch) | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| return setReturn(), err | |
| } else { | |
| buf = append(buf, ch) | |
| } | |
| } else if ch == '\'' { | |
| // Closing quotation | |
| buf = append(buf, ch) | |
| return setReturn(), nil | |
| } else { | |
| buf = append(buf, ch) | |
| } | |
| } | |
| case unicode.IsSpace(ch): | |
| // Group consecutive white space characters | |
| for { | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| // Don't pass along this EOF since we did find a valid 'Unit' | |
| // to return. This way, the next call of this function will | |
| // return EOF and nothing else, a more clear behavior. | |
| if err == io.EOF { | |
| return setReturn(), nil | |
| } | |
| return setReturn(), err | |
| } else if !unicode.IsSpace(ch) { | |
| rr.UnreadRune() | |
| break | |
| } | |
| buf = append(buf, ch) | |
| } | |
| case isIdent(ch): | |
| // Group consecutive letters | |
| for { | |
| ch, err = rr.ReadRune() | |
| if err != nil { | |
| if err == io.EOF { | |
| return setReturn(), nil | |
| } | |
| return setReturn(), err | |
| } else if !isIdent(ch) { | |
| rr.UnreadRune() | |
| break | |
| } | |
| buf = append(buf, ch) | |
| } | |
| case ch == '{': | |
| braceLev += 1 | |
| case ch == '}': | |
| braceLev -= 1 | |
| } | |
| // Implicitly, everything that's not a group of letters, not a group of | |
| // whitespace, not a comment, and not a string-literal, (common examples of | |
| // runes in this category are numbers and symbols like '&' or '9') will be | |
| // returned one rune at a time. | |
| return setReturn(), nil | |
| } | |
| // Service scanner conducts many of the basic scanning operatiions of a Lexer, | |
| // with some additional service-specific behavior. | |
| // | |
| // Since this scanners is specifically for scanning the Protobuf service | |
| // definitions, it will only scan the sections of the input from the reader | |
| // that it believes are part of a service definition. This means that it will | |
| // "fast forward" through its input reader until it finds the start of a | |
| // service definition. It will keep track of braces (the "{}" characters) till | |
| // it finds the final closing brace marking the end of the service definition. | |
| type SvcScanner struct { | |
| R *RuneReader | |
| BraceLevel int | |
| Buf []*ScanUnit | |
| UnitPos int | |
| lineNo int | |
| } | |
| func NewSvcScanner(r io.Reader) *SvcScanner { | |
| b := make([]*ScanUnit, 0) | |
| rr := NewRuneReader(r) | |
| for { | |
| unit, err := BuildScanUnit(rr) | |
| if err == nil { | |
| b = append(b, unit) | |
| } else { | |
| break | |
| } | |
| } | |
| return &SvcScanner{ | |
| R: NewRuneReader(r), | |
| BraceLevel: 0, | |
| Buf: b, | |
| UnitPos: 0, | |
| } | |
| } | |
| // ReadUnit returns the next "group" of runes found in the input stream. If the | |
| // end of the stream is reached, io.EOF will be returned as error. No other | |
| // errors will be returned. | |
| func (self *SvcScanner) ReadUnit() ([]rune, error) { | |
| var rv []rune | |
| var err error | |
| if self.UnitPos < len(self.Buf) { | |
| unit := self.Buf[self.UnitPos] | |
| self.BraceLevel = unit.BraceLevel | |
| self.lineNo = unit.LineNo | |
| rv = unit.Value | |
| self.UnitPos += 1 | |
| } else { | |
| err = io.EOF | |
| } | |
| return rv, err | |
| } | |
| func (self *SvcScanner) UnreadUnit() error { | |
| if self.UnitPos == 0 { | |
| return fmt.Errorf("Cannot unread when scanner is at start of input") | |
| } | |
| // If we're on the first unit, Unreading means setting the state of the | |
| // scanner back to it's defaults. | |
| if self.UnitPos == 1 { | |
| self.UnitPos = 0 | |
| self.BraceLevel = 0 | |
| self.lineNo = 0 | |
| } | |
| self.UnitPos -= 1 | |
| // Since the state of the scanner usually tracks one behind the `unit` | |
| // indicated by `UnitPos` we further subtract one when selecting the unit | |
| // to reflect the state of | |
| unit := self.Buf[self.UnitPos-1] | |
| self.BraceLevel = unit.BraceLevel | |
| self.lineNo = unit.LineNo | |
| return nil | |
| } | |
| func (self *SvcScanner) UnReadToPosition(position int) error { | |
| for { | |
| if self.UnitPos != position { | |
| err := self.UnreadUnit() | |
| if err != nil { | |
| return err | |
| } | |
| } else { | |
| break | |
| } | |
| } | |
| return nil | |
| } | |
| func (self *SvcScanner) GetLineNumber() int { | |
| return self.lineNo | |
| } | |
| func main() { | |
| if len(os.Args) > 1 { | |
| fmt.Fprintf(os.Stderr, `Usage: %s | |
| %s accepts no options. It reads from STDIN and writes to STDOUT. | |
| %s will do it's best to format the text provided in a "sane" way. | |
| `, | |
| os.Args[0], os.Args[0], os.Args[0]) | |
| return | |
| } | |
| scn := NewSvcScanner(os.Stdin) | |
| peak := func() (string, error) { | |
| unit, err := scn.ReadUnit() | |
| if err != nil { | |
| return "", err | |
| } | |
| str := string(unit) | |
| // Coalesce whitespace | |
| if unicode.IsSpace(unit[0]) { | |
| if strings.Contains(str, "\n") { | |
| str = "\n" | |
| } else { | |
| str = " " | |
| } | |
| } | |
| err = scn.UnreadUnit() | |
| if err != nil { | |
| return "", err | |
| } | |
| return str, nil | |
| } | |
| var base_indent = " " | |
| for { | |
| unit, err := scn.ReadUnit() | |
| if err == io.EOF { | |
| break | |
| } | |
| if err != nil { | |
| panic(err) | |
| } | |
| str := string(unit) | |
| // Coalesce whitespace | |
| if unicode.IsSpace(unit[0]) { | |
| if strings.Contains(str, "\n") { | |
| str = "\n" | |
| } else { | |
| str = " " | |
| } | |
| } | |
| if str == "\n" { | |
| next, err := peak() | |
| if err == io.EOF || (err == nil && next == "}") { | |
| continue | |
| } else if err != nil { | |
| panic(err) | |
| } | |
| indent := "" | |
| for i := 0; i < scn.BraceLevel; i++ { | |
| indent += " " | |
| } | |
| fmt.Printf("%s%s", str, indent) | |
| } else if str == "{" { | |
| indent := "" | |
| next, err := peak() | |
| if (err == io.EOF || err == nil) && next == "\n" { | |
| _, _ = scn.ReadUnit() | |
| } | |
| for i := 0; i < scn.BraceLevel; i++ { | |
| indent += " " | |
| } | |
| fmt.Printf("%s\n%s", str, indent) | |
| } else if str == "}" { | |
| indent := "" | |
| for i := 0; i < scn.BraceLevel; i++ { | |
| indent += base_indent | |
| } | |
| next, err := peak() | |
| if (err == io.EOF || err == nil) && (next == " " || next == "\n") { | |
| _, _ = scn.ReadUnit() | |
| } else if err != nil { | |
| panic(err) | |
| } | |
| var after string = "\n" + indent | |
| next, err = peak() | |
| if err == io.EOF || (err == nil && next == "}") { | |
| after = "" | |
| } else if err != nil { | |
| panic(err) | |
| } | |
| fmt.Printf("\n%s%s%s", indent, str, after) | |
| // For comments with newlines, print indentation after the comment | |
| } else if unit[0] == '/' { | |
| var after string = "" | |
| if strings.Contains(str, "\n") { | |
| for i := 0; i < scn.BraceLevel; i++ { | |
| after += base_indent | |
| } | |
| } | |
| next, err := peak() | |
| if (err == io.EOF || err == nil) && next == " " { | |
| _, _ = scn.ReadUnit() | |
| } else if err != nil { | |
| panic(err) | |
| } | |
| fmt.Printf("%s%s", str, after) | |
| } else { | |
| fmt.Printf("%s", str) | |
| } | |
| } | |
| fmt.Println() | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment