Last active
January 12, 2022 02:00
-
-
Save qxxt/f43af14081c00d22c2ed1848c1c2627b to your computer and use it in GitHub Desktop.
Parsing hosts Golang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// BSD Zero Clause License | |
// | |
// Copyright (c) 2022 qxxt | |
// | |
// Permission to use, copy, modify, and/or distribute this software for | |
// any purpose with or without fee is hereby granted. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL | |
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED | |
// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE | |
// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR | |
// CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS | |
// OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, | |
// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
// | |
// | |
// Note: | |
// ParseHostsLine() is much slower than ParseHostsByte() | |
// And ParseHostsByte() has no slice capacity overhead. | |
// ParseHostsLine() is only kept to validate the results | |
// from ParseHostsByte(). Because it is hard to debug them. | |
package main | |
import ( | |
"bytes" | |
"log" | |
"reflect" | |
"testing" | |
) | |
type HostsItem struct { | |
Ip []byte | |
Aliases [][]byte | |
} | |
var ( | |
TestFile = "hosts.txt" | |
MaxLine = 20 | |
LineRes, ByteRes []HostsItem | |
) | |
var TestData = []byte(` | |
# [zapr.in] | |
127.0.0.1 appmm.zapr.in | |
127.0.0.1 sdk.zapr.in # | |
127.0.0.1 submit.zapr.in# | |
#hdjjdj | |
# [zarget.com] | |
127.0.0.1 zarget.com jdjdjdj djdjjd | |
127.0.0.1 cdn.zarget.com djjdjd#jdjjd | |
`) | |
func init() { | |
var err error | |
//TestData, err = os.ReadFile(TestFile) | |
if err != nil { | |
log.Fatal(err) | |
} | |
} | |
func BenchmarkLine(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
LineRes = ParseHostsLine(TestData) | |
} | |
} | |
func BenchmarkByte(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
ByteRes = ParseHostsByte(TestData) | |
} | |
} | |
func TestEquality(t *testing.T) { | |
if ByteRes == nil { | |
ByteRes = ParseHostsByte(TestData) | |
} | |
if LineRes == nil { | |
LineRes = ParseHostsLine(TestData) | |
} | |
if len(ByteRes) != len(LineRes) { | |
t.Logf("Unequal Length\nByteRes: %d\nLineRes: %d", | |
len(ByteRes), len(LineRes)) | |
t.FailNow() | |
} | |
for i := range ByteRes { | |
if !reflect.DeepEqual(ByteRes[i].Ip, LineRes[i].Ip) { | |
t.Logf("Index: %d\nByteRes.Ip: %q\nLineRes.Ip: %q", | |
i, ByteRes[i].Ip, LineRes[i].Ip) | |
t.FailNow() | |
} | |
if !reflect.DeepEqual(ByteRes[i].Aliases, LineRes[i].Aliases) { | |
t.Logf("Index: %d\nByteRes.Aliases: %q\nLineRes.Aliases: %q", | |
i, bytes.Join(ByteRes[i].Aliases, []byte(", ")), | |
bytes.Join(LineRes[i].Aliases, []byte(", "))) | |
t.FailNow() | |
} | |
} | |
} | |
func ParseHostsLine(b []byte) []HostsItem { | |
bOl := bytes.Split(b, []byte("\n")) | |
res := make([]HostsItem, len(bOl)) | |
i := 0 | |
for _, bl := range bOl { | |
if len(bl) == 0 || bl[0] == '#' { | |
continue | |
} | |
bb := bytes.Fields( | |
bytes.SplitN(bl, []byte("#"), 2)[0]) | |
if len(bb) < 2 { | |
continue | |
} | |
res[i].Ip = bb[0] | |
res[i].Aliases = bb[1:] | |
i++ | |
} | |
return res[:i] | |
} | |
func ParseHostsByte(b []byte) []HostsItem { | |
var ( | |
comment, event bool | |
mark = make([]int, 0, 4) | |
region = make([][]int, 0, bytes.Count(b, []byte("\n"))) | |
) | |
if b[len(b)-1] != '\n' { | |
b = append(b, '\n') | |
} | |
for i := 0; i < len(b); i++ { | |
switch b[i] { | |
case ' ', '\t': | |
if event { | |
mark = append(mark, i) | |
event = false | |
} | |
case '\n': | |
if comment { | |
comment = false | |
} | |
if l := len(mark); l != 0 { | |
if event { | |
mark = append(mark, i) | |
event = false | |
} | |
if l > 2 { | |
region = append(region, mark) | |
} | |
mark = make([]int, 0, 4) | |
} | |
case '#': | |
if !comment { | |
comment = true | |
if event { | |
mark = append(mark, i) | |
event = false | |
} | |
} | |
default: | |
if !comment && !event { | |
mark = append(mark, i) | |
event = true | |
} | |
} | |
} | |
var res = make([]HostsItem, len(region)) | |
for i, l := 0, len(res); i < l; i++ { | |
res[i].Ip = b[region[i][0]:region[i][1]] | |
ll := len(region[i][2:]) / 2 | |
res[i].Aliases = make([][]byte, ll) | |
for ii, iii := 0, 2; ii < ll; ii++ { | |
res[i].Aliases[ii] = | |
b[region[i][iii]:region[i][iii+1]] | |
iii += 2 | |
} | |
} | |
return res | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment