|
package ipld |
|
|
|
import ( |
|
"context" |
|
"fmt" |
|
"hash" |
|
"io" |
|
) |
|
|
|
// |
|
// -- the relevant types --> |
|
// |
|
|
|
type Link interface { |
|
Prototype() LinkPrototype |
|
} |
|
type LinkPrototype interface { |
|
Link([]byte) Link |
|
} |
|
type LinkContext struct { |
|
Ctx context.Context // Use this for cancellation (or attaching additional info, for example to pass auth tokens through to the storage functions). |
|
LinkPath Path // Path where the link was encountered. May be zero. Traversals will set this automatically. |
|
LinkNode Node // When reading: the Node containing the link -- it may have additional type info, etc, that can be accessed. When writing: not present. Traversals will set this automatically. |
|
LinkNodeAssembler NodeAssembler // When writing: the NodeAssembler that will be receiving the link -- it may have additional type info, etc, that can be accessed. When reading: not present. Traversals will set this automatically. |
|
} |
|
|
|
type Encoder func(Node, io.Writer) error |
|
type Decoder func(NodeAssembler, io.Reader) error |
|
|
|
type LinkSystem struct { |
|
isDefault bool |
|
EncoderChooser func(LinkPrototype) (Encoder, error) |
|
DecoderChooser func(Link) (Decoder, error) |
|
HasherChooser func(LinkPrototype) (hash.Hash, error) |
|
StorageWriteChooser func(LinkPrototype) (BlockReceiver, error) |
|
StorageReadChooser func(Link) (BlockReadOpener, error) |
|
} |
|
|
|
type BlockReceiver func(LinkContext) (io.Writer, BlockCommitter, error) |
|
type BlockCommitter func(Link) error |
|
type BlockReadOpener func(LinkContext, Link) (io.Reader, error) |
|
|
|
// ErrLinkingSetup is returned by methods on LinkSystem when some part of the system is not set up correctly, |
|
// or when one of the components refuses to handle a Link or LinkPrototype given. |
|
// (It is not yielded for errors from the storage nor codec systems once they've started; those errors rise without interference.) |
|
type ErrLinkingSetup struct { |
|
Detail string // Perhaps an enum here as well, which states which internal function was to blame? |
|
Cause error |
|
} |
|
|
|
func (e ErrLinkingSetup) Error() string { return fmt.Sprintf("%s: %v", e.Detail, e.Cause) } |
|
func (e ErrLinkingSetup) Unwrap() error { return e.Cause } |
|
|
|
// |
|
// -- the functions that LinkSystem then yields --> |
|
// |
|
|
|
func (ls *LinkSystem) Load(lnk Link, np NodePrototype) (Node, error) { |
|
lnkCtx := LinkContext{Ctx: context.Background()} |
|
nb := np.NewBuilder() |
|
if err := ls.Load2(lnkCtx, lnk, nb); err != nil { |
|
return nil, err |
|
} |
|
return nb.Build(), nil |
|
} |
|
|
|
// More helpers can go here as well. |
|
// Everything attached to LinkSystem helps users without increasing demands or adding boilerplate to codec implementers, storage implementors, etc. |
|
|
|
// Can we get as far as a `QuickLoad(lnk Link) (Node, error)` function, which doesn't even ask you for a NodePrototype? |
|
// No, not quite. (Alas.) If we tried to do so, and make it use `basicnode.Prototype`, we'd have import cycles; ded. |
|
|
|
func (ls *LinkSystem) Load2(lnkCtx LinkContext, lnk Link, na NodeAssembler) error { |
|
// Choose all the parts. |
|
decoder, err := ls.DecoderChooser(lnk) |
|
if err != nil { |
|
return ErrLinkingSetup{"could not choose a decoder", err} |
|
} |
|
hasher, err := ls.HasherChooser(lnk.Prototype()) |
|
if err != nil { |
|
return ErrLinkingSetup{"could not choose a hasher", err} |
|
} |
|
storer, err := ls.StorageReadChooser(lnk) |
|
if err != nil { |
|
return ErrLinkingSetup{"could not choose a storage reader", err} |
|
} |
|
// ... chain them up and apply them, you get the idea ... |
|
reader, err := storer(lnkCtx, lnk) |
|
if err != nil { |
|
return err |
|
} |
|
tee := io.TeeReader(reader, hasher) |
|
decodeErr := decoder(na, tee) |
|
if decodeErr != nil { // It is important to security to check the hash before returning any other observation about the content. |
|
_, err := io.Copy(hasher, reader) |
|
if err != nil { |
|
return err |
|
} |
|
} |
|
hash := hasher.Sum(nil) |
|
// Bit of a jig to get something we can do the hash equality check on. |
|
lnk2 := lnk.Prototype().Link(hash) |
|
if lnk2 != lnk { |
|
return fmt.Errorf("hash mismatch! %v (actual) != %v (expected)", lnk2, lnk) // fixme: should also be typed |
|
} |
|
if decodeErr != nil { |
|
return decodeErr |
|
} |
|
return nil |
|
} |
|
|
|
func (ls *LinkSystem) Store(lp LinkPrototype, n Node) (Link, error) { |
|
return ls.Store2(LinkContext{Ctx: context.Background()}, lp, n) |
|
} |
|
|
|
func (ls *LinkSystem) Store2(lnkCtx LinkContext, lp LinkPrototype, n Node) (Link, error) { |
|
// Choose all the parts. |
|
encoder, err := ls.EncoderChooser(lp) |
|
if err != nil { |
|
return nil, ErrLinkingSetup{"could not choose an encoder", err} |
|
} |
|
hasher, err := ls.HasherChooser(lp) |
|
if err != nil { |
|
return nil, ErrLinkingSetup{"could not choose a hasher", err} |
|
} |
|
storer, err := ls.StorageWriteChooser(lp) |
|
if err != nil { |
|
return nil, ErrLinkingSetup{"could not choose a storage writer", err} |
|
} |
|
// chain em up and run it |
|
writer, commitFn, err := storer(lnkCtx) |
|
if err != nil { |
|
return nil, err |
|
} |
|
tee := io.MultiWriter(writer, hasher) |
|
err = encoder(n, tee) |
|
if err != nil { |
|
return nil, err |
|
} |
|
lnk := lp.Link(hasher.Sum(nil)) |
|
return lnk, commitFn(lnk) |
|
} |
|
|
|
// ComputeLink returns a Link for the given data, but doesn't do anything else |
|
// (e.g. it doesn't try to store any of the serial-form data anywhere else). |
|
func (ls *LinkSystem) ComputeLink(lp LinkPrototype, n Node) (Link, error) { |
|
encoder, err := ls.EncoderChooser(lp) |
|
if err != nil { |
|
return nil, ErrLinkingSetup{"could not choose an encoder", err} |
|
} |
|
hasher, err := ls.HasherChooser(lp) |
|
if err != nil { |
|
return nil, ErrLinkingSetup{"could not choose a hasher", err} |
|
} |
|
err = encoder(n, hasher) |
|
if err != nil { |
|
return nil, err |
|
} |
|
return lp.Link(hasher.Sum(nil)), nil |
|
} |
|
func (ls *LinkSystem) MustComputeLink(lp LinkPrototype, n Node) Link { |
|
if lnk, err := ls.ComputeLink(lp, n); err != nil { |
|
panic(err) |
|
} else { |
|
return lnk |
|
} |
|
} |
|
|
|
// |
|
// -- how we synthesize all these guts --> |
|
// |
|
|
|
var MulticodecEncodersRegistry = map[uint64]Encoder{} // Typically codec packages register themselves here during init. |
|
var MulticodecDecodersRegistry = map[uint64]Decoder{} // Typically codec packages register themselves here during init. |
|
|
|
var MultihashRegistry = map[uint64]Encoder{} // Typically hasher packages register themselves here during init. // <- questionable. Many `hash.Hash` conformant functions out there; not ideal if we need a wrapper *package* for all of them just for registration. |
|
|
|
var DefaultLinkSystem = LinkSystem{ |
|
isDefault: true, // purely for the sake of error messages. |
|
EncoderChooser: func(lp LinkPrototype) (Encoder, error) { |
|
// Notice how the type switch here gives us a way to manuver away from go-cid in the future, gracefully: we can just add cases to this switch. |
|
switch lp2 := lp.(type) { |
|
case CidPrototype: |
|
// This chooser function uses the global registry, but if someone really wanted to customize this (use their own speed-optimized something-or-other, whatever), |
|
// then they could do so by replacing this whole chooser function that does what they like. |
|
fn, ok := MulticodecEncodersRegistry[lp2.MulticodecIndicator()] |
|
if !ok { |
|
return nil, fmt.Errorf("no encoder registered for multicodec indicator %x", lp2.MulticodecIndicator()) |
|
} |
|
return fn, nil |
|
default: |
|
return nil, fmt.Errorf("this encoderChooser can only handle Cid links; got %T", lp) |
|
} |
|
}, |
|
// ... honestly, you get the idea. |
|
// We would probably make helper structs for these things, also, rather than just bang it all inline here. |
|
// For example, a struct for multicodec registries, and then that just exports methods which happen to match the needs of the chooser func fields here; |
|
// that makes it even easier for people to make and use custom non-global registries (but still be using the multicodec registry pattern). |
|
} |
|
|
|
// |
|
// -- a few temp stubs (just to keep this single-file) that roughly describe go-cid --> |
|
// |
|
|
|
type CidPrototype interface { |
|
MulticodecIndicator() uint64 |
|
MultihashIndicator() uint64 |
|
MultihashLength() int |
|
MultihashBody() []byte |
|
} |
|
|
|
// |
|
// -- notes --> |
|
// |
|
|
|
/* |
|
I *did* consider having the LinkPrototype.Link method be allowed to return error. |
|
However, turned out every time it did so, I immediately had the handling logic of: |
|
|
|
if err != nil { |
|
panic(err) // this same thing told us what hasher to pick, so it ought to be satisfied with these bytes! |
|
} |
|
|
|
... and so, it seems we might as well say that the LinkPrototype should panic if it gets fed a byte slice it really doesn't like. |
|
Could still be convinced either way, though. |
|
*/ |
|
|
|
/* |
|
I'm not at all sure that StorageWriteChooser and StorageReadChooser need to take Link and LinkPrototype arguments. |
|
|
|
I think they got them in the first draft because all the neighbors did. |
|
But it's hard to imagine wanting to use that. |
|
|
|
Maybe those fields should just be BlockReceiver and BlockReadOpener with no further ado. |
|
|
|
Put another way: does the storage write opener logic (aka BlockReceiver) ever need to see a LinkPrototype? |
|
Because that's the one thing its concretely missing, and could |
|
*/ |
I borrowed some review time from Hannah and here are my live (thus rough) notes:
and if it's not given, it defaults to using the global one.later edit: it turned out there was no reasonable way to do a global default one.)LinkPrototype
is basicallygo-cid.Prefix
, yes.It also occurs to me (since I was just looking at possible go-cid refactors today):
LinkPrototype{ Link([]byte) Link }
actually going to be sufficiently friendly to allocation minimization? (is it possible to do better?)