Last active
February 29, 2024 07:17
-
-
Save febuiles/c9c825d234018305bf0410a5148c23c0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"io/ioutil" | |
"math/rand" | |
"os" | |
"time" | |
"unicode/utf8" | |
) | |
func main() { | |
// create a 2mb file of valid UTF-8 (worst case) | |
rand.Seed(time.Now().UnixNano()) | |
path := "gibberish.txt" | |
if err := writeFile(path, 2*1024*1024); err != nil { | |
panic(err) | |
} | |
data, err := ioutil.ReadFile(path) | |
if err != nil { | |
panic(err) | |
} | |
start := time.Now() | |
valid := utf8.Valid(data) | |
duration := time.Since(start) | |
defer os.Remove(path) | |
fmt.Printf("UTF-8 valid: %t\n", valid) | |
fmt.Printf("Time taken: %s\n", duration) | |
} | |
func writeFile(path string, size int) error { | |
file, err := os.Create(path) | |
if err != nil { | |
return err | |
} | |
defer file.Close() | |
// only use bytes we know are utf-8 valid | |
for i := 0; i < size; i++ { | |
char := byte(rand.Intn(95) + 32) // 32-126 inclusive | |
if _, err := file.Write([]byte{char}); err != nil { | |
return err | |
} | |
} | |
// add one invalid sequence at the end | |
if _, err := file.Write([]byte{0xF0, 0x41}); err != nil { | |
return err | |
} | |
return nil | |
} |
Author
febuiles
commented
Feb 27, 2024
•
~/w/dgp (yarn-parser)
$ go run foo.go
UTF-8 valid: false
Time taken: 214.916µs
~/w/dgp (yarn-parser)
$ go run foo.go
UTF-8 valid: false
Time taken: 248.625µs
~/w/dgp (yarn-parser)
$ go run foo.go
UTF-8 valid: false
Time taken: 195.458µs
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment