rlp: improve nil pointer handling (#20064)

* rlp: improve nil pointer handling In both encoder and decoder, the rules for encoding nil pointers were a bit hard to understand, and didn't leave much choice. Since RLP allows two empty values (empty list, empty string), any protocol built on RLP must choose either of these values to represent the null value in a certain context. This change adds choice in the form of two new struct tags, "nilString" and "nilList". These can be used to specify how a nil pointer value is encoded. The "nil" tag still exists, but its implementation is now explicit and defines exactly how nil pointers are handled in a single place. Another important change in this commit is how nil pointers and the Encoder interface interact. The EncodeRLP method was previously called even on nil values, which was supposed to give users a choice of how their value would be handled when nil. It turns out this is a stupid idea. If you create a network protocol containing an object defined in another package, it's better to be able to say that the object should be a list or string when nil in the definition of the protocol message rather than defining the encoding of nil on the object itself. As of this commit, the encoding rules for pointers now take precedence over the Encoder interface rule. I think the "nil" tag will work fine for most cases. For special kinds of objects which are a struct in Go but strings in RLP, code using the object can specify the desired encoding of nil using the "nilString" and "nilList" tags. * rlp: propagate struct field type errors If a struct contained fields of undecodable type, the encoder and decoder would panic instead of returning an error. Fix this by propagating type errors in makeStruct{Writer,Decoder} and add a test.
2019-09-13 11:10:57 +02:00
parent 3b6c9902f3
commit 96fb839133
7 changed files with 415 additions and 243 deletions
--- a/rlp/encode.go
+++ b/rlp/encode.go
@@ -49,36 +49,7 @@ type Encoder interface {
 // perform many small writes in some cases. Consider making w
 // buffered.
 //
-// Encode uses the following type-dependent encoding rules:
-//
-// If the type implements the Encoder interface, Encode calls
-// EncodeRLP. This is true even for nil pointers, please see the
-// documentation for Encoder.
-//
-// To encode a pointer, the value being pointed to is encoded. For nil
-// pointers, Encode will encode the zero value of the type. A nil
-// pointer to a struct type always encodes as an empty RLP list.
-// A nil pointer to an array encodes as an empty list (or empty string
-// if the array has element type byte).
-//
-// Struct values are encoded as an RLP list of all their encoded
-// public fields. Recursive struct types are supported.
-//
-// To encode slices and arrays, the elements are encoded as an RLP
-// list of the value's elements. Note that arrays and slices with
-// element type uint8 or byte are always encoded as an RLP string.
-//
-// A Go string is encoded as an RLP string.
-//
-// An unsigned integer value is encoded as an RLP string. Zero always
-// encodes as an empty RLP string. Encode also supports *big.Int.
-//
-// Boolean values are encoded as unsigned integers zero (false) and one (true).
-//
-// An interface value encodes as the value contained in the interface.
-//
-// Signed integers are not supported, nor are floating point numbers, maps,
-// channels and functions.
+// Please see package-level documentation of encoding rules.
 func Encode(w io.Writer, val interface{}) error {
 	if outer, ok := w.(*encbuf); ok {
 		// Encode was called by some type's EncodeRLP.
@@ -95,7 +66,7 @@ func Encode(w io.Writer, val interface{}) error {
 }

 // EncodeToBytes returns the RLP encoding of val.
-// Please see the documentation of Encode for the encoding rules.
+// Please see package-level documentation for the encoding rules.
 func EncodeToBytes(val interface{}) ([]byte, error) {
 	eb := encbufPool.Get().(*encbuf)
 	defer encbufPool.Put(eb)
@@ -349,16 +320,14 @@ func makeWriter(typ reflect.Type, ts tags) (writer, error) {
 	switch {
 	case typ == rawValueType:
 		return writeRawValue, nil
-	case typ.Implements(encoderInterface):
-		return writeEncoder, nil
-	case kind != reflect.Ptr && reflect.PtrTo(typ).Implements(encoderInterface):
-		return writeEncoderNoPtr, nil
-	case kind == reflect.Interface:
-		return writeInterface, nil
 	case typ.AssignableTo(reflect.PtrTo(bigInt)):
 		return writeBigIntPtr, nil
 	case typ.AssignableTo(bigInt):
 		return writeBigIntNoPtr, nil
+	case kind == reflect.Ptr:
+		return makePtrWriter(typ, ts)
+	case reflect.PtrTo(typ).Implements(encoderInterface):
+		return makeEncoderWriter(typ), nil
 	case isUint(kind):
 		return writeUint, nil
 	case kind == reflect.Bool:
@@ -373,8 +342,8 @@ func makeWriter(typ reflect.Type, ts tags) (writer, error) {
 		return makeSliceWriter(typ, ts)
 	case kind == reflect.Struct:
 		return makeStructWriter(typ)
-	case kind == reflect.Ptr:
-		return makePtrWriter(typ)
+	case kind == reflect.Interface:
+		return writeInterface, nil
 	default:
 		return nil, fmt.Errorf("rlp: type %v is not RLP-serializable", typ)
 	}
@@ -470,26 +439,6 @@ func writeString(val reflect.Value, w *encbuf) error {
 	return nil
 }

-func writeEncoder(val reflect.Value, w *encbuf) error {
-	return val.Interface().(Encoder).EncodeRLP(w)
-}
-
-// writeEncoderNoPtr handles non-pointer values that implement Encoder
-// with a pointer receiver.
-func writeEncoderNoPtr(val reflect.Value, w *encbuf) error {
-	if !val.CanAddr() {
-		// We can't get the address. It would be possible to make the
-		// value addressable by creating a shallow copy, but this
-		// creates other problems so we're not doing it (yet).
-		//
-		// package json simply doesn't call MarshalJSON for cases like
-		// this, but encodes the value as if it didn't implement the
-		// interface. We don't want to handle it that way.
-		return fmt.Errorf("rlp: game over: unadressable value of type %v, EncodeRLP is pointer method", val.Type())
-	}
-	return val.Addr().Interface().(Encoder).EncodeRLP(w)
-}
-
 func writeInterface(val reflect.Value, w *encbuf) error {
 	if val.IsNil() {
 		// Write empty list. This is consistent with the previous RLP
@@ -531,6 +480,11 @@ func makeStructWriter(typ reflect.Type) (writer, error) {
 	if err != nil {
 		return nil, err
 	}
+	for _, f := range fields {
+		if f.info.writerErr != nil {
+			return nil, structFieldError{typ, f.index, f.info.writerErr}
+		}
+	}
 	writer := func(val reflect.Value, w *encbuf) error {
 		lh := w.list()
 		for _, f := range fields {
@@ -544,44 +498,51 @@ func makeStructWriter(typ reflect.Type) (writer, error) {
 	return writer, nil
 }

-func makePtrWriter(typ reflect.Type) (writer, error) {
+func makePtrWriter(typ reflect.Type, ts tags) (writer, error) {
 	etypeinfo := cachedTypeInfo1(typ.Elem(), tags{})
 	if etypeinfo.writerErr != nil {
 		return nil, etypeinfo.writerErr
 	}
-
-	// determine nil pointer handler
-	var nilfunc func(*encbuf) error
-	kind := typ.Elem().Kind()
-	switch {
-	case kind == reflect.Array && isByte(typ.Elem().Elem()):
-		nilfunc = func(w *encbuf) error {
-			w.str = append(w.str, 0x80)
-			return nil
-		}
-	case kind == reflect.Struct || kind == reflect.Array:
-		nilfunc = func(w *encbuf) error {
-			// encoding the zero value of a struct/array could trigger
-			// infinite recursion, avoid that.
-			w.listEnd(w.list())
-			return nil
-		}
-	default:
-		zero := reflect.Zero(typ.Elem())
-		nilfunc = func(w *encbuf) error {
-			return etypeinfo.writer(zero, w)
-		}
+	// Determine how to encode nil pointers.
+	var nilKind Kind
+	if ts.nilOK {
+		nilKind = ts.nilKind // use struct tag if provided
+	} else {
+		nilKind = defaultNilKind(typ.Elem())
 	}

 	writer := func(val reflect.Value, w *encbuf) error {
 		if val.IsNil() {
-			return nilfunc(w)
+			if nilKind == String {
+				w.str = append(w.str, 0x80)
+			} else {
+				w.listEnd(w.list())
+			}
+			return nil
 		}
 		return etypeinfo.writer(val.Elem(), w)
 	}
 	return writer, nil
 }

+func makeEncoderWriter(typ reflect.Type) writer {
+	if typ.Implements(encoderInterface) {
+		return func(val reflect.Value, w *encbuf) error {
+			return val.Interface().(Encoder).EncodeRLP(w)
+		}
+	}
+	w := func(val reflect.Value, w *encbuf) error {
+		if !val.CanAddr() {
+			// package json simply doesn't call MarshalJSON for this case, but encodes the
+			// value as if it didn't implement the interface. We don't want to handle it that
+			// way.
+			return fmt.Errorf("rlp: unadressable value of type %v, EncodeRLP is pointer method", val.Type())
+		}
+		return val.Addr().Interface().(Encoder).EncodeRLP(w)
+	}
+	return w
+}
+
 // putint writes i to the beginning of b in big endian byte
 // order, using the least number of bytes needed to represent i.
 func putint(b []byte, i uint64) (size int) {