Last active
March 16, 2021 11:53
-
-
Save songtianyi/dacee5974dad41920247e98cc5489033 to your computer and use it in GitHub Desktop.
padding space between en and zh words
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"os" | |
"unicode" | |
"unicode/utf8" | |
) | |
var ( | |
zh_en_mapping = map[rune]rune{ | |
':': ':', | |
} | |
mdchars = []rune{'`', '*', '-', '_', '>', '(', ')', '[', ']', '"', '\'', ':', ';', | |
'@', '!', '/', '.', '=', '&', '\\', '+', '{', '}', '~', '\\', '\'', '"', ',', '?'} | |
) | |
func is_md_char(r rune) bool { | |
for _, v := range mdchars { | |
if v == r { | |
return true | |
} | |
} | |
return false | |
} | |
func special(r rune) bool { | |
return unicode.IsSpace(r) || unicode.IsPunct(r) || is_md_char(r) | |
} | |
func padding(text string) { | |
b := []byte(text) | |
var ( | |
last_r rune | |
last_size int | |
) | |
for len(b) > 0 { | |
r, size := utf8.DecodeRune(b) | |
skip := size | |
if last_size == 1 && size > 1 { | |
// songtianyi:$ --> songtianyi:$ | |
if v, ok := zh_en_mapping[r]; ok { | |
// replace | |
r = v | |
size = 1 | |
} | |
} | |
if ((last_size-size) == 2 || (last_size-size) == -2) && | |
!special(last_r) && | |
!special(r) { | |
// space padding | |
fmt.Printf("%c", ' ') | |
} | |
// print original stuff | |
fmt.Printf("%c", r) | |
last_r = r | |
last_size = size | |
b = b[skip:] | |
} | |
} | |
func main() { | |
scanner := bufio.NewScanner(os.Stdin) | |
for scanner.Scan() { | |
padding(scanner.Text()) | |
fmt.Printf("\n") | |
} | |
if err := scanner.Err(); err != nil { | |
fmt.Fprintln(os.Stderr, "reading standard input:", err) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment