|  | // Copyright 2021 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package fuzz | 
|  |  | 
|  | import ( | 
|  | "bytes" | 
|  | "fmt" | 
|  | "go/ast" | 
|  | "go/parser" | 
|  | "go/token" | 
|  | "math" | 
|  | "strconv" | 
|  | "unicode/utf8" | 
|  | ) | 
|  |  | 
|  | // encVersion1 will be the first line of a file with version 1 encoding. | 
|  | var encVersion1 = "go test fuzz v1" | 
|  |  | 
|  | // marshalCorpusFile encodes an arbitrary number of arguments into the file format for the | 
|  | // corpus. | 
|  | func marshalCorpusFile(vals ...any) []byte { | 
|  | if len(vals) == 0 { | 
|  | panic("must have at least one value to marshal") | 
|  | } | 
|  | b := bytes.NewBuffer([]byte(encVersion1 + "\n")) | 
|  | // TODO(katiehockman): keep uint8 and int32 encoding where applicable, | 
|  | // instead of changing to byte and rune respectively. | 
|  | for _, val := range vals { | 
|  | switch t := val.(type) { | 
|  | case int, int8, int16, int64, uint, uint16, uint32, uint64, bool: | 
|  | fmt.Fprintf(b, "%T(%v)\n", t, t) | 
|  | case float32: | 
|  | if math.IsNaN(float64(t)) && math.Float32bits(t) != math.Float32bits(float32(math.NaN())) { | 
|  | // We encode unusual NaNs as hex values, because that is how users are | 
|  | // likely to encounter them in literature about floating-point encoding. | 
|  | // This allows us to reproduce fuzz failures that depend on the specific | 
|  | // NaN representation (for float32 there are about 2^24 possibilities!), | 
|  | // not just the fact that the value is *a* NaN. | 
|  | // | 
|  | // Note that the specific value of float32(math.NaN()) can vary based on | 
|  | // whether the architecture represents signaling NaNs using a low bit | 
|  | // (as is common) or a high bit (as commonly implemented on MIPS | 
|  | // hardware before around 2012). We believe that the increase in clarity | 
|  | // from identifying "NaN" with math.NaN() is worth the slight ambiguity | 
|  | // from a platform-dependent value. | 
|  | fmt.Fprintf(b, "math.Float32frombits(0x%x)\n", math.Float32bits(t)) | 
|  | } else { | 
|  | // We encode all other values — including the NaN value that is | 
|  | // bitwise-identical to float32(math.Nan()) — using the default | 
|  | // formatting, which is equivalent to strconv.FormatFloat with format | 
|  | // 'g' and can be parsed by strconv.ParseFloat. | 
|  | // | 
|  | // For an ordinary floating-point number this format includes | 
|  | // sufficiently many digits to reconstruct the exact value. For positive | 
|  | // or negative infinity it is the string "+Inf" or "-Inf". For positive | 
|  | // or negative zero it is "0" or "-0". For NaN, it is the string "NaN". | 
|  | fmt.Fprintf(b, "%T(%v)\n", t, t) | 
|  | } | 
|  | case float64: | 
|  | if math.IsNaN(t) && math.Float64bits(t) != math.Float64bits(math.NaN()) { | 
|  | fmt.Fprintf(b, "math.Float64frombits(0x%x)\n", math.Float64bits(t)) | 
|  | } else { | 
|  | fmt.Fprintf(b, "%T(%v)\n", t, t) | 
|  | } | 
|  | case string: | 
|  | fmt.Fprintf(b, "string(%q)\n", t) | 
|  | case rune: // int32 | 
|  | // Although rune and int32 are represented by the same type, only a subset | 
|  | // of valid int32 values can be expressed as rune literals. Notably, | 
|  | // negative numbers, surrogate halves, and values above unicode.MaxRune | 
|  | // have no quoted representation. | 
|  | // | 
|  | // fmt with "%q" (and the corresponding functions in the strconv package) | 
|  | // would quote out-of-range values to the Unicode replacement character | 
|  | // instead of the original value (see https://go.dev/issue/51526), so | 
|  | // they must be treated as int32 instead. | 
|  | // | 
|  | // We arbitrarily draw the line at UTF-8 validity, which biases toward the | 
|  | // "rune" interpretation. (However, we accept either format as input.) | 
|  | if utf8.ValidRune(t) { | 
|  | fmt.Fprintf(b, "rune(%q)\n", t) | 
|  | } else { | 
|  | fmt.Fprintf(b, "int32(%v)\n", t) | 
|  | } | 
|  | case byte: // uint8 | 
|  | // For bytes, we arbitrarily prefer the character interpretation. | 
|  | // (Every byte has a valid character encoding.) | 
|  | fmt.Fprintf(b, "byte(%q)\n", t) | 
|  | case []byte: // []uint8 | 
|  | fmt.Fprintf(b, "[]byte(%q)\n", t) | 
|  | default: | 
|  | panic(fmt.Sprintf("unsupported type: %T", t)) | 
|  | } | 
|  | } | 
|  | return b.Bytes() | 
|  | } | 
|  |  | 
|  | // unmarshalCorpusFile decodes corpus bytes into their respective values. | 
|  | func unmarshalCorpusFile(b []byte) ([]any, error) { | 
|  | if len(b) == 0 { | 
|  | return nil, fmt.Errorf("cannot unmarshal empty string") | 
|  | } | 
|  | lines := bytes.Split(b, []byte("\n")) | 
|  | if len(lines) < 2 { | 
|  | return nil, fmt.Errorf("must include version and at least one value") | 
|  | } | 
|  | if string(lines[0]) != encVersion1 { | 
|  | return nil, fmt.Errorf("unknown encoding version: %s", lines[0]) | 
|  | } | 
|  | var vals []any | 
|  | for _, line := range lines[1:] { | 
|  | line = bytes.TrimSpace(line) | 
|  | if len(line) == 0 { | 
|  | continue | 
|  | } | 
|  | v, err := parseCorpusValue(line) | 
|  | if err != nil { | 
|  | return nil, fmt.Errorf("malformed line %q: %v", line, err) | 
|  | } | 
|  | vals = append(vals, v) | 
|  | } | 
|  | return vals, nil | 
|  | } | 
|  |  | 
|  | func parseCorpusValue(line []byte) (any, error) { | 
|  | fs := token.NewFileSet() | 
|  | expr, err := parser.ParseExprFrom(fs, "(test)", line, 0) | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | call, ok := expr.(*ast.CallExpr) | 
|  | if !ok { | 
|  | return nil, fmt.Errorf("expected call expression") | 
|  | } | 
|  | if len(call.Args) != 1 { | 
|  | return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args)) | 
|  | } | 
|  | arg := call.Args[0] | 
|  |  | 
|  | if arrayType, ok := call.Fun.(*ast.ArrayType); ok { | 
|  | if arrayType.Len != nil { | 
|  | return nil, fmt.Errorf("expected []byte or primitive type") | 
|  | } | 
|  | elt, ok := arrayType.Elt.(*ast.Ident) | 
|  | if !ok || elt.Name != "byte" { | 
|  | return nil, fmt.Errorf("expected []byte") | 
|  | } | 
|  | lit, ok := arg.(*ast.BasicLit) | 
|  | if !ok || lit.Kind != token.STRING { | 
|  | return nil, fmt.Errorf("string literal required for type []byte") | 
|  | } | 
|  | s, err := strconv.Unquote(lit.Value) | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | return []byte(s), nil | 
|  | } | 
|  |  | 
|  | var idType *ast.Ident | 
|  | if selector, ok := call.Fun.(*ast.SelectorExpr); ok { | 
|  | xIdent, ok := selector.X.(*ast.Ident) | 
|  | if !ok || xIdent.Name != "math" { | 
|  | return nil, fmt.Errorf("invalid selector type") | 
|  | } | 
|  | switch selector.Sel.Name { | 
|  | case "Float64frombits": | 
|  | idType = &ast.Ident{Name: "float64-bits"} | 
|  | case "Float32frombits": | 
|  | idType = &ast.Ident{Name: "float32-bits"} | 
|  | default: | 
|  | return nil, fmt.Errorf("invalid selector type") | 
|  | } | 
|  | } else { | 
|  | idType, ok = call.Fun.(*ast.Ident) | 
|  | if !ok { | 
|  | return nil, fmt.Errorf("expected []byte or primitive type") | 
|  | } | 
|  | if idType.Name == "bool" { | 
|  | id, ok := arg.(*ast.Ident) | 
|  | if !ok { | 
|  | return nil, fmt.Errorf("malformed bool") | 
|  | } | 
|  | if id.Name == "true" { | 
|  | return true, nil | 
|  | } else if id.Name == "false" { | 
|  | return false, nil | 
|  | } else { | 
|  | return nil, fmt.Errorf("true or false required for type bool") | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | var ( | 
|  | val  string | 
|  | kind token.Token | 
|  | ) | 
|  | if op, ok := arg.(*ast.UnaryExpr); ok { | 
|  | switch lit := op.X.(type) { | 
|  | case *ast.BasicLit: | 
|  | if op.Op != token.SUB { | 
|  | return nil, fmt.Errorf("unsupported operation on int/float: %v", op.Op) | 
|  | } | 
|  | // Special case for negative numbers. | 
|  | val = op.Op.String() + lit.Value // e.g. "-" + "124" | 
|  | kind = lit.Kind | 
|  | case *ast.Ident: | 
|  | if lit.Name != "Inf" { | 
|  | return nil, fmt.Errorf("expected operation on int or float type") | 
|  | } | 
|  | if op.Op == token.SUB { | 
|  | val = "-Inf" | 
|  | } else { | 
|  | val = "+Inf" | 
|  | } | 
|  | kind = token.FLOAT | 
|  | default: | 
|  | return nil, fmt.Errorf("expected operation on int or float type") | 
|  | } | 
|  | } else { | 
|  | switch lit := arg.(type) { | 
|  | case *ast.BasicLit: | 
|  | val, kind = lit.Value, lit.Kind | 
|  | case *ast.Ident: | 
|  | if lit.Name != "NaN" { | 
|  | return nil, fmt.Errorf("literal value required for primitive type") | 
|  | } | 
|  | val, kind = "NaN", token.FLOAT | 
|  | default: | 
|  | return nil, fmt.Errorf("literal value required for primitive type") | 
|  | } | 
|  | } | 
|  |  | 
|  | switch typ := idType.Name; typ { | 
|  | case "string": | 
|  | if kind != token.STRING { | 
|  | return nil, fmt.Errorf("string literal value required for type string") | 
|  | } | 
|  | return strconv.Unquote(val) | 
|  | case "byte", "rune": | 
|  | if kind == token.INT { | 
|  | switch typ { | 
|  | case "rune": | 
|  | return parseInt(val, typ) | 
|  | case "byte": | 
|  | return parseUint(val, typ) | 
|  | } | 
|  | } | 
|  | if kind != token.CHAR { | 
|  | return nil, fmt.Errorf("character literal required for byte/rune types") | 
|  | } | 
|  | n := len(val) | 
|  | if n < 2 { | 
|  | return nil, fmt.Errorf("malformed character literal, missing single quotes") | 
|  | } | 
|  | code, _, _, err := strconv.UnquoteChar(val[1:n-1], '\'') | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | if typ == "rune" { | 
|  | return code, nil | 
|  | } | 
|  | if code >= 256 { | 
|  | return nil, fmt.Errorf("can only encode single byte to a byte type") | 
|  | } | 
|  | return byte(code), nil | 
|  | case "int", "int8", "int16", "int32", "int64": | 
|  | if kind != token.INT { | 
|  | return nil, fmt.Errorf("integer literal required for int types") | 
|  | } | 
|  | return parseInt(val, typ) | 
|  | case "uint", "uint8", "uint16", "uint32", "uint64": | 
|  | if kind != token.INT { | 
|  | return nil, fmt.Errorf("integer literal required for uint types") | 
|  | } | 
|  | return parseUint(val, typ) | 
|  | case "float32": | 
|  | if kind != token.FLOAT && kind != token.INT { | 
|  | return nil, fmt.Errorf("float or integer literal required for float32 type") | 
|  | } | 
|  | v, err := strconv.ParseFloat(val, 32) | 
|  | return float32(v), err | 
|  | case "float64": | 
|  | if kind != token.FLOAT && kind != token.INT { | 
|  | return nil, fmt.Errorf("float or integer literal required for float64 type") | 
|  | } | 
|  | return strconv.ParseFloat(val, 64) | 
|  | case "float32-bits": | 
|  | if kind != token.INT { | 
|  | return nil, fmt.Errorf("integer literal required for math.Float32frombits type") | 
|  | } | 
|  | bits, err := parseUint(val, "uint32") | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | return math.Float32frombits(bits.(uint32)), nil | 
|  | case "float64-bits": | 
|  | if kind != token.FLOAT && kind != token.INT { | 
|  | return nil, fmt.Errorf("integer literal required for math.Float64frombits type") | 
|  | } | 
|  | bits, err := parseUint(val, "uint64") | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | return math.Float64frombits(bits.(uint64)), nil | 
|  | default: | 
|  | return nil, fmt.Errorf("expected []byte or primitive type") | 
|  | } | 
|  | } | 
|  |  | 
|  | // parseInt returns an integer of value val and type typ. | 
|  | func parseInt(val, typ string) (any, error) { | 
|  | switch typ { | 
|  | case "int": | 
|  | // The int type may be either 32 or 64 bits. If 32, the fuzz tests in the | 
|  | // corpus may include 64-bit values produced by fuzzing runs on 64-bit | 
|  | // architectures. When running those tests, we implicitly wrap the values to | 
|  | // fit in a regular int. (The test case is still “interesting”, even if the | 
|  | // specific values of its inputs are platform-dependent.) | 
|  | i, err := strconv.ParseInt(val, 0, 64) | 
|  | return int(i), err | 
|  | case "int8": | 
|  | i, err := strconv.ParseInt(val, 0, 8) | 
|  | return int8(i), err | 
|  | case "int16": | 
|  | i, err := strconv.ParseInt(val, 0, 16) | 
|  | return int16(i), err | 
|  | case "int32", "rune": | 
|  | i, err := strconv.ParseInt(val, 0, 32) | 
|  | return int32(i), err | 
|  | case "int64": | 
|  | return strconv.ParseInt(val, 0, 64) | 
|  | default: | 
|  | panic("unreachable") | 
|  | } | 
|  | } | 
|  |  | 
|  | // parseInt returns an unsigned integer of value val and type typ. | 
|  | func parseUint(val, typ string) (any, error) { | 
|  | switch typ { | 
|  | case "uint": | 
|  | i, err := strconv.ParseUint(val, 0, 64) | 
|  | return uint(i), err | 
|  | case "uint8", "byte": | 
|  | i, err := strconv.ParseUint(val, 0, 8) | 
|  | return uint8(i), err | 
|  | case "uint16": | 
|  | i, err := strconv.ParseUint(val, 0, 16) | 
|  | return uint16(i), err | 
|  | case "uint32": | 
|  | i, err := strconv.ParseUint(val, 0, 32) | 
|  | return uint32(i), err | 
|  | case "uint64": | 
|  | return strconv.ParseUint(val, 0, 64) | 
|  | default: | 
|  | panic("unreachable") | 
|  | } | 
|  | } |