Created
November 24, 2019 19:43
-
-
Save ksurent/ce0146391e46b48794ff8c5f8c562f55 to your computer and use it in GitHub Desktop.
A toy grammar–based fuzzer for bookingcom/carbonapi. Heavily inspired by The Fuzzing Book.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"flag" | |
"fmt" | |
"math" | |
"math/rand" | |
"os" | |
"runtime/debug" | |
"strings" | |
"time" | |
"github.com/bookingcom/carbonapi/expr" | |
"github.com/bookingcom/carbonapi/expr/functions" | |
"github.com/bookingcom/carbonapi/pkg/parser" | |
"golang.org/x/exp/ebnf" | |
) | |
var ( | |
seed = flag.Int64("seed", time.Now().UnixNano(), "Random seed.") | |
dryrun = flag.Bool("dryrun", false, "Generate inputs but don't feed them into the parser.") | |
count = flag.Int("count", 100, "Number of inputs to generate in dry run (0 = infinity).") | |
maxtime = flag.Duration("maxtime", 0, "Maximum run time (0 = indefinite).") | |
) | |
func main() { | |
flag.Parse() | |
rand.Seed(*seed) | |
g, err := ebnf.Parse("", strings.NewReader(grmmr)) | |
if err != nil { | |
errout(err) | |
} | |
if err := ebnf.Verify(g, "Top"); err != nil { | |
errout(err) | |
} | |
computeAlts(g, altps) | |
total, crashers := 0, 0 | |
bytes := 0.0 | |
t0 := time.Now() | |
functions.New(nil) | |
if *maxtime > 0 { | |
go func() { | |
time.Sleep(*maxtime) | |
fmt.Fprintln(os.Stderr, "Maximum run time reached, exiting.") | |
os.Exit(0) | |
}() | |
} | |
go func() { | |
for { | |
// Yes, this is race-y. It's fine. | |
dt := time.Since(t0) | |
rate := bytes / 1024 / 10 / dt.Seconds() | |
avglen := bytes / float64(total) | |
fmt.Fprintf(os.Stderr, "Runtime: %s; Inputs: %d (%.2f KiB/s; Avglen: %.2f B); Crashers: %d\n", dt.Truncate(time.Second), total, rate, avglen, crashers) | |
time.Sleep(10 * time.Second) | |
} | |
}() | |
for { | |
input := generate(g, "Top", altps) | |
total++ | |
bytes += float64(len(input)) | |
if *dryrun { | |
fmt.Fprintf(os.Stdout, "[%s]\n", input) | |
if *count > 0 && total >= *count { | |
break | |
} | |
} else { | |
if crashed, panic, stack := check(input); crashed { | |
save(input, panic, stack) | |
crashers++ | |
} | |
} | |
} | |
} | |
func check(s string) (crashed bool, panic, stack string) { | |
defer func() { | |
if r := recover(); r != nil { | |
crashed = true | |
panic = fmt.Sprint(r) | |
stack = string(debug.Stack()) | |
// Remove 'goroutine 1 [running]' and debug.Stack itself. | |
stack = stack[strings.IndexByte(stack, '\n')+1:] | |
stack = stack[strings.IndexByte(stack, '\n')+1:] | |
stack = stack[strings.IndexByte(stack, '\n')+1:] | |
return | |
} | |
}() | |
if e, _, err := parser.ParseExpr(s); err == nil { | |
if _, err = expr.EvalExpr(e, 0, 1, nil); err == nil { | |
return | |
} | |
} | |
return | |
} | |
func save(crasher, panic, stack string) { | |
fh, err := os.OpenFile("/tmp/crashers", os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0644) | |
if err != nil { | |
fmt.Fprintln(os.Stderr, "[", crasher, "]") | |
return | |
} | |
defer fh.Close() | |
fmt.Fprintf(fh, "found crasher:\n---\n[%s]\n---\n", crasher) | |
fmt.Fprintf(fh, "error:\n---\n%s\n%s\n---\n", panic, stack) | |
} | |
func generate(g ebnf.Grammar, start string, altps map[string][]float64) string { | |
// NOTE(asurikov): track stack depth? | |
var visit func(ebnf.Expression, []float64) string | |
visit = func(e ebnf.Expression, ps []float64) string { | |
switch ee := e.(type) { | |
case ebnf.Sequence: | |
var b strings.Builder | |
for i := range ee { | |
b.WriteString(visit(ee[i], ps)) | |
} | |
return b.String() | |
case ebnf.Alternative: | |
return visit(ee[roll(ps)], ps) | |
case *ebnf.Group: | |
return visit(ee.Body, ps) | |
case *ebnf.Option: | |
// NOTE(asurikov): favour non-empty for shorter outputs? | |
if rand.Intn(100) >= 50 { | |
return visit(ee.Body, ps) | |
} | |
return "" | |
case *ebnf.Production: | |
return visit(ee.Expr, altps[ee.Name.String]) | |
case *ebnf.Range: | |
begin, end := int(ee.Begin.String[0]), int(ee.End.String[0]) | |
return string(begin + rand.Intn(end-begin)) | |
case *ebnf.Repetition: | |
var b strings.Builder | |
for { | |
if rand.Float64() > repeatp { | |
break | |
} | |
b.WriteString(visit(ee.Body, ps)) | |
} | |
return b.String() | |
case *ebnf.Name: | |
return visit(g[ee.String], ps) | |
case *ebnf.Token: | |
return ee.String | |
} | |
panic(fmt.Sprintf("%T: %v", e, e)) | |
} | |
return visit(g[start], nil) | |
} | |
func errout(err error) { | |
fmt.Fprintln(os.Stderr, err) | |
os.Exit(1) | |
} | |
const grmmr = ` | |
Top = Expression . | |
Expression = Metric | Call . | |
Metric = Node { NodeSep Node } . | |
Node = Word | Glob . | |
Word = Letter { Letter | Digit | Punct } . | |
Call = Function "(" ArgList ")" . | |
ArgList = [ Argument { ArgSep Argument } ] . | |
Argument = Literal | Metric | KeyValue | Call . | |
KeyValue = Word "=" Literal . | |
Literal = Bool | Number | QWord . | |
Number = RandomNum | InterestingNum . | |
RandomNum = [ Sign ] ( Integer | Float ). | |
Integer = LeadDigit { Digit } . | |
Float = Integer "." Integer . | |
QWord = "\"" Word "\"" . | |
Bool = "true" | "True" | "TRUE" | "false" | "False" | "FALSE" . | |
Sign = "-" . | |
LeadDigit = "1" … "9" . | |
Digit = "0" … "9" . | |
Letter = "a" … "z" | "A" … "Z" . | |
Glob = "*" . | |
NodeSep = "." . | |
Punct = "_" | "-" . | |
ArgSep = "," . | |
InterestingNum = "-1" | "0" | "1" | "2147483647" | "-2147483648" | "-9223372036854775808" | "9223372036854775807" . | |
Function = "absolute" | "alias" | "aliasByMetric" | "aliasByNode" | "aliasSub" | "asPercent" | "averageSeries" | "averageSeriesWithWildcards" | "below" | "cactiStyle" | "cairo" | "changed" | "consolidateBy" | "constantLine" | "countSeries" | "cumulative" | "delay" | "derivative" | "diffSeries" | "divideSeries" | "ewma" | "exclude" | "fallbackSeries" | "fft" | "grep" | "group" | "groupByNode" | "highest" | "hitcount" | "holtWintersAberration" | "holtWintersConfidenceBands" | "holtWintersForecast" | "ifft" | "integral" | "invert" | "isNotNull" | "keepLastValue" | "kolmogorovSmirnovTest2" | "legendValue" | "limit" | "linearRegression" | "logarithm" | "lowPass" | "lowest" | "mapSeries" | "minMax" | "mostDeviant" | "moving" | "movingMedian" | "multiplySeries" | "multiplySeriesWithWildcards" | "nPercentile" | "nonNegativeDerivative" | "offset" | "offsetToZero" | "pearson" | "pearsonClosest" | "perSecond" | "percentileOfSeries" | "polyfit" | "pow" | "randomWalk" | "rangeOfSeries" | "reduce" | "removeBelowSeries" | "removeEmptySeries" | "scale" | "scaleToSeconds" | "seriesList" | "sortBy" | "sortByName" | "squareRoot" | "stddevSeries" | "stdev" | "substr" | "sum" | "sumSeriesWithWildcards" | "summarize" | "timeFunction" | "timeShift" | "timeStack" | "transformNull" | "tukey" . | |
` | |
// Probability of every individual repetition execution to produce non–empty | |
// result. Higher probablity means more repetitions. At 0.9 I'm consistently | |
// getting stack overflows. | |
const repeatp = 0.85 | |
// Maps probalitities to production rules. Make "holes" with math.NaN. | |
// Unmentioned rules will be split evenly between "remainder" probability. | |
var altps = map[string][]float64{ | |
"Expression": {0.01, 0.99}, // Metric is 1%, Expression is 99%. | |
"Literal": {0, 1, 0}, // Bool is 0%, Number is 100%, QWord is 0%. | |
"Number": {math.NaN(), 0.99}, // RandomNum is 1%, InterestingNum is 99%. | |
} | |
func computeAlts(g ebnf.Grammar, altps map[string][]float64) { | |
// TODO(asurikov): support ranges. | |
// TODO(asurikov): support options. | |
for _, e := range g { | |
var l int | |
if alts, ok := e.Expr.(ebnf.Alternative); ok { | |
l = len(alts) | |
} else { | |
continue | |
} | |
if _, ok := altps[e.Name.String]; !ok { | |
altps[e.Name.String] = makeNaNs(l) | |
} | |
complete(altps[e.Name.String]) | |
} | |
} | |
func complete(ps []float64) { | |
var unknown, psum float64 | |
for _, p := range ps { | |
if math.IsNaN(p) { | |
unknown++ | |
} else { | |
psum += p | |
} | |
} | |
defp := (1 - psum) / unknown | |
psum = 0 | |
for i, p := range ps { | |
if math.IsNaN(p) { | |
ps[i] = defp | |
} | |
psum += ps[i] | |
} | |
const epsilon = 0.00001 | |
if ok := math.Abs(1-psum) < epsilon; !ok { | |
panic("must add to 1") | |
} | |
} | |
func makeNaNs(n int) []float64 { | |
nans := make([]float64, n) | |
for i := 0; i < n; i++ { | |
nans[i] = math.NaN() | |
} | |
return nans | |
} | |
func roll(ps []float64) int { | |
var ( | |
i int | |
sum float64 | |
r = rand.Float64() | |
) | |
for _, p := range ps { | |
sum += p | |
if r < sum { | |
break | |
} | |
i++ | |
} | |
return i | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment