warpfork · February 7, 2021 18:27 · warpfork · Jan 26, 2021
diff --git a/README.md b/README.md
diff --git a/linking.go b/linking.go
 package ipld

 import (
 	"context"
 	"fmt"
 	"hash"
 	"io"
 )

 //
 // -- the relevant types -->
 //

 type Link interface {
 	Prototype() LinkPrototype
 }
 type LinkPrototype interface {
 	Link([]byte) Link
 }
 type LinkContext struct {
 	Ctx               context.Context // Use this for cancellation (or attaching additional info, for example to pass auth tokens through to the storage functions).
 	LinkPath          Path            // Path where the link was encountered.  May be zero.  Traversals will set this automatically.
 	LinkNode          Node            // When reading: the Node containing the link -- it may have additional type info, etc, that can be accessed.  When writing: not present.  Traversals will set this automatically.
 	LinkNodeAssembler NodeAssembler   // When writing: the NodeAssembler that will be receiving the link -- it may have additional type info, etc, that can be accessed.  When reading: not present.  Traversals will set this automatically.
 }

 type Encoder func(Node, io.Writer) error
 type Decoder func(NodeAssembler, io.Reader) error

 type LinkSystem struct {
 	isDefault           bool
 	EncoderChooser      func(LinkPrototype) (Encoder, error)
 	DecoderChooser      func(Link) (Decoder, error)
 	HasherChooser       func(LinkPrototype) (hash.Hash, error)
 	StorageWriteChooser func(LinkPrototype) (BlockReceiver, error)
 	StorageReadChooser  func(Link) (BlockReadOpener, error)
 }

 type BlockReceiver func(LinkContext) (io.Writer, BlockCommitter, error)
 type BlockCommitter func(Link) error
 type BlockReadOpener func(LinkContext, Link) (io.Reader, error)

 // ErrLinkingSetup is returned by methods on LinkSystem when some part of the system is not set up correctly,
 // or when one of the components refuses to handle a Link or LinkPrototype given.
 // (It is not yielded for errors from the storage nor codec systems once they've started; those errors rise without interference.)
 type ErrLinkingSetup struct {
 	Detail string // Perhaps an enum here as well, which states which internal function was to blame?
 	Cause  error
 }

 func (e ErrLinkingSetup) Error() string { return fmt.Sprintf("%s: %v", e.Detail, e.Cause) }
 func (e ErrLinkingSetup) Unwrap() error { return e.Cause }

 //
 // -- the functions that LinkSystem then yields -->
 //

 func (ls *LinkSystem) Load(lnk Link, np NodePrototype) (Node, error) {
 	lnkCtx := LinkContext{Ctx: context.Background()}
 	nb := np.NewBuilder()
 	if err := ls.Load2(lnkCtx, lnk, nb); err != nil {
 		return nil, err
 	}
 	return nb.Build(), nil
 }

 // More helpers can go here as well.
 // Everything attached to LinkSystem helps users without increasing demands or adding boilerplate to codec implementers, storage implementors, etc.

 // Can we get as far as a `QuickLoad(lnk Link) (Node, error)` function, which doesn't even ask you for a NodePrototype?
 //  No, not quite.  (Alas.)  If we tried to do so, and make it use `basicnode.Prototype`, we'd have import cycles; ded.

 func (ls *LinkSystem) Load2(lnkCtx LinkContext, lnk Link, na NodeAssembler) error {
 	// Choose all the parts.
 	decoder, err := ls.DecoderChooser(lnk)
 	if err != nil {
 		return ErrLinkingSetup{"could not choose a decoder", err}
 	}
 	hasher, err := ls.HasherChooser(lnk.Prototype())
 	if err != nil {
 		return ErrLinkingSetup{"could not choose a hasher", err}
 	}
 	storer, err := ls.StorageReadChooser(lnk)
 	if err != nil {
 		return ErrLinkingSetup{"could not choose a storage reader", err}
 	}
 	// ... chain them up and apply them, you get the idea ...
 	reader, err := storer(lnkCtx, lnk)
 	if err != nil {
 		return err
 	}
 	tee := io.TeeReader(reader, hasher)
 	decodeErr := decoder(na, tee)
 	if decodeErr != nil { // It is important to security to check the hash before returning any other observation about the content.
 		_, err := io.Copy(hasher, reader)
 		if err != nil {
 			return err
 		}
 	}
 	hash := hasher.Sum(nil)
 	// Bit of a jig to get something we can do the hash equality check on.
 	lnk2 := lnk.Prototype().Link(hash)
 	if lnk2 != lnk {
 		return fmt.Errorf("hash mismatch!  %v (actual) != %v (expected)", lnk2, lnk) // fixme: should also be typed
 	}
 	if decodeErr != nil {
 		return decodeErr
 	}
 	return nil
 }

 func (ls *LinkSystem) Store(lp LinkPrototype, n Node) (Link, error) {
 	return ls.Store2(LinkContext{Ctx: context.Background()}, lp, n)
 }

 func (ls *LinkSystem) Store2(lnkCtx LinkContext, lp LinkPrototype, n Node) (Link, error) {
 	// Choose all the parts.
 	encoder, err := ls.EncoderChooser(lp)
 	if err != nil {
 		return nil, ErrLinkingSetup{"could not choose an encoder", err}
 	}
 	hasher, err := ls.HasherChooser(lp)
 	if err != nil {
 		return nil, ErrLinkingSetup{"could not choose a hasher", err}
 	}
 	storer, err := ls.StorageWriteChooser(lp)
 	if err != nil {
 		return nil, ErrLinkingSetup{"could not choose a storage writer", err}
 	}
 	// chain em up and run it
 	writer, commitFn, err := storer(lnkCtx)
 	if err != nil {
 		return nil, err
 	}
 	tee := io.MultiWriter(writer, hasher)
 	err = encoder(n, tee)
 	if err != nil {
 		return nil, err
 	}
 	lnk := lp.Link(hasher.Sum(nil))
 	return lnk, commitFn(lnk)
 }

 // ComputeLink returns a Link for the given data, but doesn't do anything else
 // (e.g. it doesn't try to store any of the serial-form data anywhere else).
 func (ls *LinkSystem) ComputeLink(lp LinkPrototype, n Node) (Link, error) {
 	encoder, err := ls.EncoderChooser(lp)
 	if err != nil {
 		return nil, ErrLinkingSetup{"could not choose an encoder", err}
 	}
 	hasher, err := ls.HasherChooser(lp)
 	if err != nil {
 		return nil, ErrLinkingSetup{"could not choose a hasher", err}
 	}
 	err = encoder(n, hasher)
 	if err != nil {
 		return nil, err
 	}
 	return lp.Link(hasher.Sum(nil)), nil
 }
 func (ls *LinkSystem) MustComputeLink(lp LinkPrototype, n Node) Link {
 	if lnk, err := ls.ComputeLink(lp, n); err != nil {
 		panic(err)
 	} else {
 		return lnk
 	}
 }

 //
 // -- how we synthesize all these guts -->
 //

 var MulticodecEncodersRegistry = map[uint64]Encoder{} // Typically codec packages register themselves here during init.
 var MulticodecDecodersRegistry = map[uint64]Decoder{} // Typically codec packages register themselves here during init.

 var MultihashRegistry = map[uint64]Encoder{} // Typically hasher packages register themselves here during init. // <- questionable.  Many `hash.Hash` conformant functions out there; not ideal if we need a wrapper *package* for all of them just for registration.

 var DefaultLinkSystem = LinkSystem{
 	isDefault: true, // purely for the sake of error messages.
 	EncoderChooser: func(lp LinkPrototype) (Encoder, error) {
 		// Notice how the type switch here gives us a way to manuver away from go-cid in the future, gracefully: we can just add cases to this switch.
 		switch lp2 := lp.(type) {
 		case CidPrototype:
 			// This chooser function uses the global registry, but if someone really wanted to customize this (use their own speed-optimized something-or-other, whatever),
 			//  then they could do so by replacing this whole chooser function that does what they like.
 			fn, ok := MulticodecEncodersRegistry[lp2.MulticodecIndicator()]
 			if !ok {
 				return nil, fmt.Errorf("no encoder registered for multicodec indicator %x", lp2.MulticodecIndicator())
 			}
 			return fn, nil
 		default:
 			return nil, fmt.Errorf("this encoderChooser can only handle Cid links; got %T", lp)
 		}
 	},
 	// ... honestly, you get the idea.
 	// We would probably make helper structs for these things, also, rather than just bang it all inline here.
 	// For example, a struct for multicodec registries, and then that just exports methods which happen to match the needs of the chooser func fields here;
 	//  that makes it even easier for people to make and use custom non-global registries (but still be using the multicodec registry pattern).
 }

 //
 // -- a few temp stubs (just to keep this single-file) that roughly describe go-cid -->
 //

 type CidPrototype interface {
 	MulticodecIndicator() uint64
 	MultihashIndicator() uint64
 	MultihashLength() int
 	MultihashBody() []byte
 }

 //
 // -- notes -->
 //

 /*
 	I *did* consider having the LinkPrototype.Link method be allowed to return error.
 	However, turned out every time it did so, I immediately had the handling logic of:

 		if err != nil {
 			panic(err) // this same thing told us what hasher to pick, so it ought to be satisfied with these bytes!
 		}

 	... and so, it seems we might as well say that the LinkPrototype should panic if it gets fed a byte slice it really doesn't like.
 	Could still be convinced either way, though.
 */

 /*
 	I'm not at all sure that StorageWriteChooser and StorageReadChooser need to take Link and LinkPrototype arguments.

 	I think they got them in the first draft because all the neighbors did.
 	But it's hard to imagine wanting to use that.

 	Maybe those fields should just be BlockReceiver and BlockReadOpener with no further ado.

 	Put another way: does the storage write opener logic (aka BlockReceiver) ever need to see a LinkPrototype?
 	Because that's the one thing its concretely missing, and could
 */
	package ipld

	import (
	"context"
	"fmt"
	"hash"
	"io"
	)

	//
	// -- the relevant types -->
	//

	type Link interface {
	Prototype() LinkPrototype
	}
	type LinkPrototype interface {
	Link([]byte) Link
	}
	type LinkContext struct {
	Ctx context.Context // Use this for cancellation (or attaching additional info, for example to pass auth tokens through to the storage functions).
	LinkPath Path // Path where the link was encountered. May be zero. Traversals will set this automatically.
	LinkNode Node // When reading: the Node containing the link -- it may have additional type info, etc, that can be accessed. When writing: not present. Traversals will set this automatically.
	LinkNodeAssembler NodeAssembler // When writing: the NodeAssembler that will be receiving the link -- it may have additional type info, etc, that can be accessed. When reading: not present. Traversals will set this automatically.
	}

	type Encoder func(Node, io.Writer) error
	type Decoder func(NodeAssembler, io.Reader) error

	type LinkSystem struct {
	isDefault bool
	EncoderChooser func(LinkPrototype) (Encoder, error)
	DecoderChooser func(Link) (Decoder, error)
	HasherChooser func(LinkPrototype) (hash.Hash, error)
	StorageWriteChooser func(LinkPrototype) (BlockReceiver, error)
	StorageReadChooser func(Link) (BlockReadOpener, error)
	}

	type BlockReceiver func(LinkContext) (io.Writer, BlockCommitter, error)
	type BlockCommitter func(Link) error
	type BlockReadOpener func(LinkContext, Link) (io.Reader, error)

	// ErrLinkingSetup is returned by methods on LinkSystem when some part of the system is not set up correctly,
	// or when one of the components refuses to handle a Link or LinkPrototype given.
	// (It is not yielded for errors from the storage nor codec systems once they've started; those errors rise without interference.)
	type ErrLinkingSetup struct {
	Detail string // Perhaps an enum here as well, which states which internal function was to blame?
	Cause error
	}

	func (e ErrLinkingSetup) Error() string { return fmt.Sprintf("%s: %v", e.Detail, e.Cause) }
	func (e ErrLinkingSetup) Unwrap() error { return e.Cause }

	//
	// -- the functions that LinkSystem then yields -->
	//

	func (ls *LinkSystem) Load(lnk Link, np NodePrototype) (Node, error) {
	lnkCtx := LinkContext{Ctx: context.Background()}
	nb := np.NewBuilder()
	if err := ls.Load2(lnkCtx, lnk, nb); err != nil {
	return nil, err
	}
	return nb.Build(), nil
	}

	// More helpers can go here as well.
	// Everything attached to LinkSystem helps users without increasing demands or adding boilerplate to codec implementers, storage implementors, etc.

	// Can we get as far as a `QuickLoad(lnk Link) (Node, error)` function, which doesn't even ask you for a NodePrototype?
	// No, not quite. (Alas.) If we tried to do so, and make it use `basicnode.Prototype`, we'd have import cycles; ded.

	func (ls *LinkSystem) Load2(lnkCtx LinkContext, lnk Link, na NodeAssembler) error {
	// Choose all the parts.
	decoder, err := ls.DecoderChooser(lnk)
	if err != nil {
	return ErrLinkingSetup{"could not choose a decoder", err}
	}
	hasher, err := ls.HasherChooser(lnk.Prototype())
	if err != nil {
	return ErrLinkingSetup{"could not choose a hasher", err}
	}
	storer, err := ls.StorageReadChooser(lnk)
	if err != nil {
	return ErrLinkingSetup{"could not choose a storage reader", err}
	}
	// ... chain them up and apply them, you get the idea ...
	reader, err := storer(lnkCtx, lnk)
	if err != nil {
	return err
	}
	tee := io.TeeReader(reader, hasher)
	decodeErr := decoder(na, tee)
	if decodeErr != nil { // It is important to security to check the hash before returning any other observation about the content.
	_, err := io.Copy(hasher, reader)
	if err != nil {
	return err
	}
	}
	hash := hasher.Sum(nil)
	// Bit of a jig to get something we can do the hash equality check on.
	lnk2 := lnk.Prototype().Link(hash)
	if lnk2 != lnk {
	return fmt.Errorf("hash mismatch! %v (actual) != %v (expected)", lnk2, lnk) // fixme: should also be typed
	}
	if decodeErr != nil {
	return decodeErr
	}
	return nil
	}

	func (ls *LinkSystem) Store(lp LinkPrototype, n Node) (Link, error) {
	return ls.Store2(LinkContext{Ctx: context.Background()}, lp, n)
	}

	func (ls *LinkSystem) Store2(lnkCtx LinkContext, lp LinkPrototype, n Node) (Link, error) {
	// Choose all the parts.
	encoder, err := ls.EncoderChooser(lp)
	if err != nil {
	return nil, ErrLinkingSetup{"could not choose an encoder", err}
	}
	hasher, err := ls.HasherChooser(lp)
	if err != nil {
	return nil, ErrLinkingSetup{"could not choose a hasher", err}
	}
	storer, err := ls.StorageWriteChooser(lp)
	if err != nil {
	return nil, ErrLinkingSetup{"could not choose a storage writer", err}
	}
	// chain em up and run it
	writer, commitFn, err := storer(lnkCtx)
	if err != nil {
	return nil, err
	}
	tee := io.MultiWriter(writer, hasher)
	err = encoder(n, tee)
	if err != nil {
	return nil, err
	}
	lnk := lp.Link(hasher.Sum(nil))
	return lnk, commitFn(lnk)
	}

	// ComputeLink returns a Link for the given data, but doesn't do anything else
	// (e.g. it doesn't try to store any of the serial-form data anywhere else).
	func (ls *LinkSystem) ComputeLink(lp LinkPrototype, n Node) (Link, error) {
	encoder, err := ls.EncoderChooser(lp)
	if err != nil {
	return nil, ErrLinkingSetup{"could not choose an encoder", err}
	}
	hasher, err := ls.HasherChooser(lp)
	if err != nil {
	return nil, ErrLinkingSetup{"could not choose a hasher", err}
	}
	err = encoder(n, hasher)
	if err != nil {
	return nil, err
	}
	return lp.Link(hasher.Sum(nil)), nil
	}
	func (ls *LinkSystem) MustComputeLink(lp LinkPrototype, n Node) Link {
	if lnk, err := ls.ComputeLink(lp, n); err != nil {
	panic(err)
	} else {
	return lnk
	}
	}

	//
	// -- how we synthesize all these guts -->
	//

	var MulticodecEncodersRegistry = map[uint64]Encoder{} // Typically codec packages register themselves here during init.
	var MulticodecDecodersRegistry = map[uint64]Decoder{} // Typically codec packages register themselves here during init.

	var MultihashRegistry = map[uint64]Encoder{} // Typically hasher packages register themselves here during init. // <- questionable. Many `hash.Hash` conformant functions out there; not ideal if we need a wrapper package for all of them just for registration.

	var DefaultLinkSystem = LinkSystem{
	isDefault: true, // purely for the sake of error messages.
	EncoderChooser: func(lp LinkPrototype) (Encoder, error) {
	// Notice how the type switch here gives us a way to manuver away from go-cid in the future, gracefully: we can just add cases to this switch.
	switch lp2 := lp.(type) {
	case CidPrototype:
	// This chooser function uses the global registry, but if someone really wanted to customize this (use their own speed-optimized something-or-other, whatever),
	// then they could do so by replacing this whole chooser function that does what they like.
	fn, ok := MulticodecEncodersRegistry[lp2.MulticodecIndicator()]
	if !ok {
	return nil, fmt.Errorf("no encoder registered for multicodec indicator %x", lp2.MulticodecIndicator())
	}
	return fn, nil
	default:
	return nil, fmt.Errorf("this encoderChooser can only handle Cid links; got %T", lp)
	}
	},
	// ... honestly, you get the idea.
	// We would probably make helper structs for these things, also, rather than just bang it all inline here.
	// For example, a struct for multicodec registries, and then that just exports methods which happen to match the needs of the chooser func fields here;
	// that makes it even easier for people to make and use custom non-global registries (but still be using the multicodec registry pattern).
	}

	//
	// -- a few temp stubs (just to keep this single-file) that roughly describe go-cid -->
	//

	type CidPrototype interface {
	MulticodecIndicator() uint64
	MultihashIndicator() uint64
	MultihashLength() int
	MultihashBody() []byte
	}

	//
	// -- notes -->
	//

	/*
	I did consider having the LinkPrototype.Link method be allowed to return error.
	However, turned out every time it did so, I immediately had the handling logic of:

	if err != nil {
	panic(err) // this same thing told us what hasher to pick, so it ought to be satisfied with these bytes!
	}

	... and so, it seems we might as well say that the LinkPrototype should panic if it gets fed a byte slice it really doesn't like.
	Could still be convinced either way, though.
	*/

	/*
	I'm not at all sure that StorageWriteChooser and StorageReadChooser need to take Link and LinkPrototype arguments.

	I think they got them in the first draft because all the neighbors did.
	But it's hard to imagine wanting to use that.

	Maybe those fields should just be BlockReceiver and BlockReadOpener with no further ado.

	Put another way: does the storage write opener logic (aka BlockReceiver) ever need to see a LinkPrototype?
	Because that's the one thing its concretely missing, and could
	*/