-
-
Save anonymous/8dfca128e1409cb198bf to your computer and use it in GitHub Desktop.
Paravirtualized Storage Interface
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // All domains need to know is container name and object names | |
| // The domain which creates the container needs to know the groupid | |
| // CONTAINER OPERATIONS | |
| // Creates a new container shared with groupid | |
| // Returns a container ID (>=0) if no error occurs. | |
| // Returns error (<0) if resource allocation fails or if container already exists | |
| int container_create(char *container, int groupid) | |
| // Deallocates resources bound to the container. | |
| // If it is in use, nobody will be able to join this container, | |
| // blocks until it is no longer in use and deallocated. | |
| // Returns an error if container does not exist. | |
| int container_destroy(int container_id) | |
| // Gain access to an existing container | |
| // The container will exist at least until container_leave is called | |
| // Returns a container ID (>=0) if no error occurs. | |
| // Returns an error (<0) if container does not exist or permission is not granted. | |
| int container_join(char *container) | |
| // Stop using a container, losing access to its resources. | |
| // Returns an error if container does not exist. | |
| int container_leave(int container_id) | |
| // OBJECT OPERATIONS | |
| // used only internally | |
| struct object { | |
| constant int BLOCK_SIZE = 1024*1024; // lock regions of this size | |
| int reader_count[size/BLOCK_SIZE]; // number | |
| mutex mutex_reader_count[size/BLOCK_SIZE]; // number of readers | |
| mutex mutex_write_read[size/BLOCK_SIZE]; // either one writer or multiple readers can have access | |
| mutex mutex_write_priority[size/BLOCK_SIZE]; // fairness for writers | |
| int size; // object data size | |
| void data[0]; // object data | |
| } | |
| // exposed to the application | |
| struct object_attr { | |
| int size; // object data size | |
| void data[0]; // object data | |
| } | |
| // All operations return error if object does not exist | |
| // Allocates memory for the object, | |
| // adds the object to the container list of objects | |
| // initializes object use counter to 0 in the backend | |
| // Returns an object ID (>=0) if no error occurs. | |
| // Returns error (<0) if object does not exist, if no resources are available, | |
| // or if access to container is not permitted | |
| int object_create(int container_id, char *object, size_t size) | |
| // Blocks if object is still in use | |
| // Removes object from container object list | |
| // Deallocates all object resources | |
| int object_destroy(int object_id) | |
| // Gain access to an existing object | |
| // Returns an object ID (>=0) | |
| // Returns error (<0) if permission denied or container does not exist | |
| int object_join(int container_id, char *object) | |
| // Stop using an object, losing access to its resources. | |
| int object_leave(int object_id) | |
| // Retrieves attributes about an object, filling in the supplied structure | |
| int object_getattr(int object_id, struct object_attr *obj) | |
| object <- _objects[object_id] | |
| obj->size = object->size | |
| obj->data = object->data | |
| // OBJECT MANIPULATION AND SYNCHRONIZATION | |
| // get and put are used for reading and writing directly from/to the memory buffer. | |
| // Writes with explicit synchronization can be done using object_write for synchronous | |
| // operation, and object_awrite for asynchronous operation | |
| // Synchronously writes size bytes of buf to object at offset. | |
| // object data is synchronized with other read and write operations: | |
| // (object_write, object_awrite, object_get, object_put) | |
| int object_write(int object_id, size_t offset, char *buf, size_t size) | |
| // Asynchronous version of object_write | |
| // Returns an operation_id. Status can be check with object_opstatus, | |
| // and return value with object_opreturn | |
| int object_awrite(int object_id, size_t offset, char *buf, size_t size) | |
| // Asynchronous read version of object_awrite | |
| // Returns an operation_id. Status can be check with object_opstatus, | |
| // and return value with object_opreturn | |
| int object_aread(int object_id, size_t offset, char *buf, size_t size) | |
| // Checks the status of operation_id, which is the return value of an | |
| // asynchronous operation. | |
| // Returns INPROGRESS, CANCELED or 0 if the operation succeeded. | |
| int object_opstatus(int operation_id) | |
| // Cancels an ongoing operation | |
| int object_opcancel(int operation_id) | |
| // Returns the return value of the operation operation_id. | |
| // Return value is unspecified if the operation_id is not valid or did not complete | |
| int object_return(int operation_id) | |
| // Get direct access to the object's memory buffer at offset, | |
| // The memory region from offset to offset+size is guaranteed to be | |
| // protected from object_write and object_awrite operations. | |
| // Use object_put to signal that acces to this region has stopped. | |
| // Returns an operation_id to be used for ending access with object_put | |
| int object_get(int object_id, size_t offset, size_t size, char **buf) | |
| obj <- _objects[object_id] | |
| int start_block = (offset / BLOCK_SIZE); | |
| int end_block = ((offset + size) / BLOCK_SIZE); | |
| for (i = start_block; i <= end_block; i++) { | |
| wait(obj->mutex_write_priority[i]) | |
| wait(obj->mutex_reader_count[i]) | |
| obj->reader_count[i]++; | |
| if (obj->reader_count[i] == 1) | |
| wait(obj->mutex_write_read[i]) | |
| signal(obj->mutex_reader_count[i]) | |
| signal(obj->mutex_write_priority[i]) | |
| } | |
| *buf = &( obj->data[offset] ); | |
| return new_operation(object_id, offset, size) | |
| // Mark a memory region as no longer in use, allowing explicit writes | |
| // such as object_write to access this region. | |
| // Resources tied to the object can be freed when no longer in use. | |
| // This operation does nothing if object_get was not called first. | |
| int object_put(int operation_id) | |
| object_id, offset, size <- lookup_object_by_operation(operation_id) | |
| obj <- _objects[object_id] | |
| int start_block = (offset / BLOCK_SIZE); | |
| int end_block = ((offset + size) / BLOCK_SIZE); | |
| for (i = start_block; i <= end_block; i++) { | |
| signal(obj->mutex_write_read[i]) | |
| wait(obj->mutex_reader_count[i]) | |
| obj->reader_count[i]--; | |
| if (obj->reader_count[i] == 0) | |
| signal(obj->mutex_write_read[i]) | |
| signal(obj->mutex_reader_count[i]) | |
| } | |
| // All operations return error if container or object do not exist | |
| // EXAMPLE READER-WRITER | |
| writer() { | |
| int con = container_create("con", groupid) | |
| int obj = object_create(con, "A", size) | |
| object_getattr(con, obj, &objattr) | |
| object_write(con, obj, 0, payload, objattr.size) | |
| object_destroy(obj); | |
| container_destroy(con) | |
| } | |
| reader() { | |
| while ((con=container_join("con")) < 0) ; // wait for writer | |
| int obj = object_join(con, "A") | |
| object_getattr(con, obj, &objattr) | |
| size_t data_size = objattr.size | |
| char *data; | |
| op = object_get(obj, 0, objattr.size, &data) | |
| compute(data, data_size) | |
| object_put(op) | |
| container_leave(con) | |
| } | |
| // BACKEND STRUCTURES | |
| // container data | |
| struct container { | |
| ObjectList *object_list; | |
| int write_policy; | |
| mutex mutex_use_counter; // work with use counter atomically | |
| int use_counter; | |
| mutex mutex_protect_destroy; // do not allow container_join after destroy is called | |
| } | |
| HashTable ContainerList { | |
| key char *container_name, | |
| value struct container | |
| } | |
| HashTable ObjectList { | |
| key char *object_name, | |
| value struct object_attr; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment