-
-
Save ustulation/b1009a943ac9deed5331b5d5d1003f20 to your computer and use it in GitHub Desktop.
/// ------------------------------------------------------ | |
/// Update-1: Metadata only with parent. | |
/// Versioned files inlined. | |
/// ------------------------------------------------------ | |
/// - While giving the dir to someone we can actually give the DirMetaData itself - that way others | |
/// would be able to attach it to wherever they want (give it a parent) in their own tree and use | |
/// it from there - they can even give it a separate name and others (including owner) wouldn't | |
/// care as content is all they actually meant to share. | |
/// | |
/// A Directory is `StructuredData::data(encrypted(serialised(Dir)));` | |
/// where: | |
// ================================= | |
struct Dir { | |
sub_dirs: Vec<DirMetadata>, | |
files: Vec<File>, | |
} | |
// ================================= | |
// If shared ppl can update this independently of each other, attaching it to any existing tree they have. | |
struct DirMetadata { | |
locator: DataIdentifier, // DataIdentifier::Structured(UNVERSIONED_SD_TYPE_TAG, XorName) | |
encrypt_key: Option<secretbox::Key>, | |
name: String, | |
created: Time, | |
modified: Time, | |
user_metadata: Vec<u8>, | |
} | |
// ================================= | |
enum File { | |
Unversioned(FileMetadata), | |
Versioned(Vec<FileMetadata>), | |
} | |
struct FileMetadata { | |
name: String, | |
size: u64, | |
created: Time, | |
modified: Time, | |
user_metadata: Vec<u8>, | |
data_map: DataMap, | |
} | |
// ================================= | |
/// ------------------------------------------------------ | |
/// Update-0: No versioning for dirs, only for files. | |
/// ------------------------------------------------------ | |
/// - This simplifies things a lot at conceptual level - dir restoration should be seen as | |
/// restore-point-operation, nothing to do with versioning. | |
/// - Dir are not versioned, only files | |
/// - Explain how sharing works (if more than one owner keys then interpreted as shared) | |
/// - Explain public and private. | |
/// ------------------------------------------------------ | |
/// | |
/// A Directory is `StructuredData::data(encrypted(serialised(Dir)));` | |
/// where: | |
// ================================= | |
struct Dir { | |
metadata: DirMetadata, | |
modified: Time, | |
parent: Option<DirLocator>, // Set to `None` for root | |
sub_dirs: Vec<DirMetadata>, | |
files: Vec<File>, | |
} | |
// ================================= | |
// DirMetadata is not allowed to be updated unless you have parent's ownership | |
struct DirMetadata { | |
locator: DataIdentifier, // DataIdentifier::Structured(UNVERSIONED_SD_TYPE_TAG, XorName) | |
encryption: DirEncryption, | |
name: String, | |
created: Time, | |
user_metadata: Vec<u8>, | |
} | |
enum DirEncryption { | |
Plaintext, | |
Ciphertext(secretbox::Key), | |
} | |
// ================================= | |
enum File { | |
Unversioned(FileMetadata), | |
Versioned { | |
versions: DataIdentifier, // Points to ImmutableData(encrypted(Vec<FileMetadata>)) | |
num_of_versions: u64, | |
latest_file: FileMetadata, | |
}, | |
} | |
struct FileMetadata { | |
name: String, | |
size: u64, | |
created: Time, | |
modified: Time, | |
user_metadata: Vec<u8>, | |
data_map: DataMap, | |
} | |
// ================================= | |
/// ------------------------------------------------------ | |
/// Original: | |
/// ------------------------------------------------------ | |
/// | |
/// An unversioned Directory is `StructuredData::data(encrypted(serialised(DirListing)));` | |
/// A versioned Directory is `StructuredData::data(encrypted(serialised(DirVersioning)));` | |
/// | |
/// where: | |
// ================================= | |
struct DirVersioning { | |
versions: XorName, // Points to ImmutableData(encrypted(Vec<DirListing>)) | |
num_of_versions: u64, | |
latest_dir: DirListing, | |
} | |
// ================================= | |
struct DirListing { | |
metadata: DirMetadata, | |
modified: Time, | |
parent: Option<DirLocator>, // Set to `None` for root | |
sub_dirs: Vec<DirMetadata>, | |
files: Vec<FileMetadata>, | |
} | |
// ================================= | |
struct DirMetadata { | |
locator: DirLocator, | |
encryption: DirEncryption, | |
name: String, | |
created: Time, | |
user_metadata: Vec<u8>, | |
} | |
struct DirLocator { | |
id: XorName, | |
versioned: bool, // What happens if you change it | |
} | |
enum DirEncryption { | |
Plaintext, | |
Ciphertext(secretbox::Key), | |
} | |
// ================================= | |
struct FileMetadata { | |
id: XorName, // unique ID of file (like inode) | |
created: Time, | |
versioning: FileVersioning, | |
} | |
struct FileInfo { | |
name: String, | |
size: u64, | |
modified: Time, | |
data_map: DataMap, | |
} | |
enum FileVersioning { | |
Unversioned(FileInfo), | |
Versioned { | |
versions: XorName, // Points to ImmutableData(encrypted(Vec<FileInfo>)) | |
num_of_versions: u64, | |
latest_file: FileInfo, | |
}, | |
} | |
// ================================= | |
/// Consider the hirarchy: | |
/// root/ | |
/// |-- dir-a | |
/// | `-- dir-b | |
/// `-- file-0 | |
/// | |
/// - Any update of dir-b made by owner/s of the tree will also make it to dir-a if that | |
/// information about dir-b is also present in dir-a. It will also lead to modification of time | |
/// stamp of dir-a if dir-a is updated. Since info about modified time stamp for a dir is not | |
/// present with its parent, root in this case is unaffected. | |
/// | |
/// - Any update of dir-b made by those with whom the dir is shared but are not the owners of the | |
/// entire tree will remain contained in dir-b because although they have `parent: | |
/// Option<DirLocator>` for reaching the parent they do not have the keys to decrypt it in order | |
/// to be able to read, nor are they listed in the owner field of StructuredData of parent | |
/// DirListing to be able to modify it. | |
/// - Repurcussion: Metadata of dir-b contained in dir-a will remain out of sync until owner of | |
/// the tree decides to sync it. Due to this, name change for a shared directory will not be | |
/// allowed except by the owner otherwise name change would not reflect in metadata of dir-b in | |
/// dir-a (until sync'd by owner of tree) which could result in dir-a ending up having | |
/// similarly named sub-dirs (when owner finally sync's metadata of dir-b in dir-a). | |
/// | |
/// - Currently we are yet to find out how versioning for a directory should work. | |
/// - If root was versioned and had the above structure and later dir-b was removed from dir-a | |
/// would we want this to reflect on root as well ? Currently it only tracks the immediate | |
/// children. | |
/// - If we restore something to version `v` and start making changes there, should it branch off | |
/// at that point or how should a behaviour be ? | |
/// - If we change the name of dir-b then it would be updated and its metadata would be updated | |
/// in dir-a thus creating a new version of dir-a `v0 -> v1`. Now files were added and other | |
/// operations were done on dir-b which does not affect dir-a. Now user choses to restore to | |
/// version `v0` for dir-a. It will show the metadata of dir-b as it was when dir-a was at | |
/// `v0`. However if we fetched dir-b using metadata in dir-a, we would get a latest dir-b. How | |
/// should we design that dir-b corresponding to that time when dir-a was at `v0` is fetched | |
/// and how would this work recursively if dir-b had children too ? | |
fn unresolved_questions() {} |
Shouldn't we either implement enough to get us full attributes to make the dirs/files usable on Windows and POSIX (e.g. we'd also need last-accessed time, allocation-size, ensure that Time is high resolution enough)
I dont see VFS based apps as the only route so dont really see this as a direct requirement to needing all or none. AWS data APIs for example define system metadata and user metadata where in system metadata you get Last accessed time and size which isn't user editable, user meta data ofc is.
Oh I dont mean this is the approach I prefer, but just as not some explicit thing to need all or none. This should prolly move to the google doc anyways which lists the high level approaches
- A and B: Yeah, not having to store the metadata in both child and parent would definitely simplify things. If we kept the metadata in the parent only, we wouldn't need
Dir::parent
anymore. This would also solve the problem that if you only have access to a directory, but not its parent, you shouldn't be able to modify its name or user metadata. Downside is that you wouldn't know the name/user_metadata unless someone would give them to you by some other means (thelocator
andencryption
would no longer be enough to get this info). But perhaps it's something we can live with? - C: I don't disagree with you, but what would you suggest there?
- D: No strong opinion on this from me. I'd be fine with it either way.
Downside is that you wouldn't know the name/user_metadata unless someone would give them to you by some other means
On the other hand, name of a shared dir can be given along with the symmetric key.
Recipient can then choose to use this name or rename a dir as they'd like (not affecting the real name saved in metadata).
From this point of view it can be seen as an advantage.
We would also lose the modified_time
timestamp (in the parent), but the current proposal doesn't have it either, so it's another thing we can possibly live without?
Yes, while giving the dir to someone we can actually give the DirMetaData
itself - that way as @Fraser999 suggests others would be able to attach it to wherever they want (give it a parent) in their own tree and use it from there - they can even give it a separate name and others (including owner) wouldn't care as content is all they actually meant to share. This is a nice idea. I'm updating the gist.
- D: i think is more trivial and if changed either way wouldn't affect code base that much - so can be done later once opinions collected. Mine is to leave it as is.
- C: We can put it inline - if it gets bigger than 100KiB then it will turn into ImmutabelData recursively until obtained DataMap fits in SD and that DataMap stored inline (usual operation). We could store it as linked list of SD's but that has the disadv of ownership tracking, signatures and other stuff associated with SD's
So I'd propose a structure like this (I changed the names of everything, just for fun):
struct Dir {
updated_at: Tm,
files: Vec<FileEntry>,
sub_directories: Vec<DirEntry>,
}
struct DirEntry {
id: XorName,
key: Option<secretbox::Key>,
name: String,
created_at: Tm,
user_metadata: Vec<u8>,
}
enum FileEntry {
Unversioned(File),
Versioned(VersionedFile),
}
struct File {
name: String,
created_at: Tm,
updated_at: Tm,
user_metadata: Vec<u8>,
data_map: DataMap,
}
struct VersionedFile {
latest_version_number: u64,
latest: File,
all_versions: XorName, // points to immutable data containing list of all versions (Vec<File>)
}
To share a directory with someone, we would add them to the owners list of the structured data first (by whatever means this is going to be done) and then given them a copy of DirEntry
of that directory. That gives them read-only access to the directory metadata (which are inside the DirEntry
structure) and the id
and key
to fetch the content of the directory (the Dir
structure).
@madadam: FileEntry with two more structs File
and VersionedFile
seems to be (not so useful) bloat. Also i have made things inline instead of pointers - I guess i updated the gist while you were typing and didn't get a chance to see it; can you see the updated gist again and point out any shortcomings ?
As for point D, yeah, I'm not particularly strong on this - just didn't know why we seemed to have cherry picked some attributes and left out others. I guess I was really wanting to know if we used the chosen attributes in some way, or if they're just a convenience for the user. If they're meant to be a convenience, I think we should make them even more convenient :)
For point C, I guess this where I think we should be trying to join up the requirements here with the data types the network handles. If SD were versioned, we could just have Dir::files
as type Vec<XorName>
where each XorName is the location of the SD representing a given file. Then we can get rid of the File
enum (or FileEntry
and VersionedFile
in your suggestion, Adam) leaving just the metadata struct.
That way, we don't have the headache of versioning in two separate ways - one implemented by the Client team and the other implemented by the Vault team. The network and SD provides the required functionality (ability to push a single new version - kinda like AppendableData
does, ability to get some or just the latest version without recovering the whole SD chunk, etc.)
@Fraser999 : I cannot comment on the new (or revamped) Network Data-Type + Data-Type-Handling unless i see it in some (even if basic) form though, to evaluate performance impact and complexity among others. If left to the client to construct one, we are saying that we are not providing something that we think is the best possible solution for a generic use-case, rather giving the apps the tool/framework to build on it. Having said that, the current SD handling is not useful and does need a few more bells and whistles to be practical.
Just noticed too that we've split the metadata up for dirs - created
is in the DirMetadata
, but modified
in the Dir
. Shouldn't they both be in the metadata struct?
If left to the client to construct one
This is what I think we should try and avoid. I'm positive that we can make this efficient for clients to use and for the network to handle if we look at it from both perspectives.
@spandan: you are right, I posted my comment before I got a chance to look at your update. I have two comments on it:
- With removal of directory metadata from
Dir
, I don't think we need the parent dir locator there anymore. - The
modified
time should either be moved fromDirMetadata
toDir
, or removed completely, because an user with access to a dir, but not it's parent is able to modify said dir, but not themodified
timestamp (which is in the parent), so the timestamp eventually is ends up being wrong.
Apart from that, I think it looks good.
So, some more questions/suggestions based on the update:
A. I'm still thinking it'd be better to not have the
Dir::metadata
member. If you have retrieved and decrypted a givenDir
then you necessarily already have at least thelocator
(andencryption
if encrypted) element(s) of that metadata to hand. You might also have the remaining fields, but they're not essential in all cases. E.g. if I want to share an encryptedDir
with you, all I would need to send to you would be thelocator
andencryption
for thatDir
. You can retrieve it, decrypt it and then store it in a parent of your choosing, applying your own values forname
,created
anduser_metadata
as you wish. Of course, I could also send you my values of those and could apply some or all of them - but you don't have to.B. The same reasoning applies to
Dir::parent
- I don't think it's efficient to store this inside the data held on the network. I can see the benefit of having this available once you've retrieved and parsed aDir
, but I'd have thought we could just hold theDir
as(Dir, DirLocator)
(where theDirLocator
is the parent's) or even with a pointer to the actual parsed parentDir
or similar?C.
File::Versioned::versions
being a singleDataIdentifier
still doesn't sit well with me to be honest. I think essentially my original questions 1 and 3 still apply here.D. I guess I'm still not clear on why we've chosen the metadata fields
created
andmodified
(andsize
for files too). Shouldn't we either implement enough to get us full attributes to make the dirs/files usable on Windows and POSIX (e.g. we'd also need last-accessed time, allocation-size, ensure thatTime
is high resolution enough) or implement none and leave that up to the user via theuser_metadata
fields?Other than that, the changes look good to me :)