Last active
August 7, 2019 18:32
-
-
Save jorendorff/8aeba25778990776622fbe281e59de92 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Vec.dfy - A sketch of Rust's Vec<T> in Dafny, proving the unsafe operations safe | |
// | |
// ### Background | |
// | |
// Rust is a systems programming language. `Vec<T>` is the basic growable array | |
// type in Rust. It's implemented in Rust, using unsafe code; some parts are | |
// implemented by an underlying `RawVec<T>` type, also using unsafe code. | |
// | |
// Dafny is a C#-like programming language with proofs. In Dafny, assertions | |
// are checked at compile time! If the compiler can't prove that the assertion | |
// will always pass, compilation fails. Likewise, integer division and array | |
// accesses must be proven safe in Dafny: divide-by-zero and out-of-bounds | |
// reads and writes are ruled out, at no run-time cost. | |
// | |
// | |
// ### What we are about | |
// | |
// This file is valid Dafny code. The upshot is that if Rust had Dafny-style | |
// assertions, `RawVec` could have a safe API (!), and `Vec` could be | |
// implemented on top of that, using only safe code (!!). | |
// | |
// In this file, we have an implementation of `Vec<T>`. It actually ends up | |
// looking rather natural and nice. | |
// | |
// Of course we're handwaving past all sorts of interesting problems. Like move | |
// semantics--Dafny really banks on "storing" imaginary logical "copies" of | |
// values, as well as a very strong flavor of `==`. Another unaddressed | |
// question is how to integrate Rust concepts (like lifetimes) into Dafny. Mere | |
// details! | |
// | |
// Another possible critique is that we're really just pushing all the | |
// interesting parts of Vec into RawVec. But that's a good change! In | |
// Rust, it's not clear where to draw the line between Vec's responsibilities | |
// and RawVec's; with Dafny, there is a rather nice, obvious line: make RawVec | |
// abstract away pointer use and encapsulate memcpy-related speed hacks. The | |
// next step might be to implement RawVec safely, too, on top of some still | |
// lower-level APIs for Alloc and pointers and memory. | |
include "RawVec.dfy" // see <https://gist.github.com/jorendorff/6f584d776f614353c62d02deeeec9125> | |
module Vec { | |
import opened R = RawVec | |
import opened Util | |
/// A contiguous growable array type, written `Vec<T>` but pronounced 'vector'. | |
class Vec<T> { | |
// In Rust, Vec has two fields, `buf` and `len`. Here we use `length` | |
// because Dafny doesn't allow fields and methods to have the same name. | |
// | |
// The division of responsibilities is the same as in Rust: the RawVec | |
// `buf` handles allocation and knows the buffer's capacity, but doesn't | |
// track at run time which parts of the buffer actually contain values and | |
// which are uninitialized garbage. | |
// | |
// It is the Vec's job to know which parts of the buffer are in use. | |
// Specifically, slots `0..length` are populated, and `length..capacity` is | |
// uninitialized spare room. | |
var buf: RawVec<T> | |
// The number of elements actually stored in the Vec. | |
var length: usize | |
// A notional copy of all the elements of the Vec as a Dafny "sequence" (an | |
// immutable data structure). A `ghost` field is guaranteed not to exist at | |
// run time; this is used only for assertions. | |
ghost var elements: seq<T> | |
// A predicate is ghost code. Returns true if this Vec is consistent. | |
predicate valid() | |
reads this, buf | |
{ | |
// What makes a Vec valid? The underlying buffer must be valid, of course. | |
buf.valid() && | |
// `length` must be in sync with `elements`. (`|elements|` is Dafny's | |
// overly mathematical syntax for sequence length.) | |
length == |elements| && | |
// The buffer must be big enough to hold the data. | |
length <= buf.cap && | |
// Elements `0..length` of the buffer are populated with the values in `elements`; | |
// the rest are unused spare capacity. | |
buf.contents == pad_end(elements, buf.cap) | |
} | |
// This function, too, is ghost code. | |
function get(i: usize): (element: T) | |
reads this, buf | |
requires valid() | |
requires 0 <= i < length | |
ensures element == elements[i] | |
{ | |
buf.get(i) | |
} | |
/// Constructs a new, empty `Vec<T>`. | |
/// | |
/// The vector will not allocate until elements are pushed onto it. | |
constructor() | |
ensures valid() && elements == [] && fresh(buf) && buf.capacity() == 0 | |
{ | |
buf := new RawVec(); | |
elements := []; | |
length := 0; | |
} | |
constructor with_capacity(capacity: usize) | |
ensures valid() && elements == [] && fresh(buf) && buf.capacity() == capacity | |
{ | |
buf := new RawVec.with_capacity(capacity); | |
elements := []; | |
length := 0; | |
new; | |
assert pad_end(elements, buf.cap) == unused(buf.cap); | |
assert all_unused(buf.contents); | |
unused_equal_lemma(buf.contents, unused(buf.cap)); | |
} | |
lemma unused_when_empty_lemma() | |
requires valid() && elements == [] | |
ensures all_unused(buf.contents) | |
{} | |
function method capacity(): usize | |
reads this, buf | |
{ | |
buf.capacity() | |
} | |
/// Reserves capacity for at least `additional` more elements to be inserted | |
/// in the given `Vec<T>`. The collection may reserve more space to avoid | |
/// frequent reallocations. After calling `reserve`, capacity will be | |
/// greater than or equal to `self.len() + additional`. Does nothing if | |
/// capacity is already sufficient. | |
method reserve(additional: usize) | |
requires valid() | |
ensures valid() | |
ensures capacity() >= len() + additional | |
ensures elements == old(elements) | |
{ | |
buf.reserve(length, additional); | |
} | |
method reserve_exact(additional: usize) | |
requires valid() | |
ensures valid() | |
ensures old(capacity()) >= len() + additional ==> old(this) == this | |
ensures capacity() >= len() + additional | |
ensures elements == old(elements) | |
{ | |
buf.reserve_exact(length, additional); | |
} | |
method shrink_to_fit() | |
requires valid() | |
ensures valid() | |
ensures capacity() <= old(capacity()) | |
ensures elements == old(elements) | |
{ | |
if capacity() != length { | |
buf.shrink_to_fit(length); | |
} | |
} | |
method truncate(new_len: usize) | |
requires valid() | |
ensures valid() | |
modifies this, this.buf | |
ensures length == min(old(length), new_len) | |
ensures elements == old(elements)[..len()] | |
ensures capacity() == old(capacity()) | |
{ | |
while new_len < length | |
modifies {this, buf} | |
invariant min(length, new_len) == min(old(length), new_len) | |
invariant valid() | |
invariant elements == old(elements)[..len()] | |
invariant capacity() == old(capacity()) | |
invariant buf == old(buf) | |
{ | |
length := length - 1; | |
var _ := buf.read(length); | |
elements := elements[..length]; | |
} | |
} | |
// Note that this doesn't ensure valid(). | |
// This really isn't useful in the Dafny version. You'd want the RawVec API. | |
method unsafe_set_len(new_len: usize) | |
modifies this | |
ensures len() == new_len | |
{ | |
length := new_len; | |
} | |
method swap_remove(index: usize) returns (value: T) | |
requires valid() | |
ensures valid() | |
requires 0 <= index < len() | |
modifies this, buf | |
ensures len() == old(len()) - 1 | |
ensures elements == old(elements)[index := old(elements[len() - 1])][..old(len()) - 1] | |
ensures value == old(elements[index]) | |
{ | |
swap(index, length - 1); | |
value := pop(); | |
} | |
method insert(index: usize, element: T) | |
requires valid() | |
ensures valid() | |
requires 0 <= index <= len() | |
modifies this, buf | |
ensures len() == old(len()) + 1 | |
ensures elements == old(elements)[..index] + [element] + old(elements)[index..] | |
method remove(index: usize) returns (value: T) | |
requires valid() | |
ensures valid() | |
requires 0 <= index < len() | |
modifies this, buf | |
ensures len() == old(len()) - 1 | |
ensures elements == old(elements)[..index] + old(elements)[index + 1..] | |
ensures value == old(elements)[index] | |
method retain(f: T -> bool) | |
requires valid() | |
ensures valid() | |
ensures elements == filter(f, old(elements)) | |
method dedup_by_key<K>(key: T -> K) | |
requires valid() | |
ensures valid() | |
// no formal spec yet | |
method dedup_by(same_bucket: (T, T) -> bool) | |
requires valid() | |
ensures valid() | |
// no formal spec yet | |
method push(value: T) | |
requires valid() | |
ensures valid() | |
modifies this, buf | |
ensures elements == old(elements) + [value] | |
ensures buf == old(buf) | |
{ | |
if length == buf.cap { | |
buf.double(); | |
} | |
buf.write(length, value); | |
length := length + 1; | |
elements := elements + [value]; | |
} | |
// In Rust, this method returns an Option<T>. In Dafny it makes more sense | |
// to require that the Vec is nonempty. | |
method pop() returns (value: T) | |
requires valid() | |
ensures valid() | |
requires len() > 0 | |
modifies this, buf | |
ensures elements == old(elements)[..old(len()) - 1] | |
ensures value == old(get(len() - 1)) | |
{ | |
length := length - 1; | |
elements := elements[..length]; | |
value := buf.read(length); | |
} | |
lemma spare_capacity_is_unused_lemma(amount: usize) | |
requires valid() && buf.cap >= length + amount | |
ensures all_unused(buf.contents[length..length + amount]) | |
{ | |
assert all_unused(buf.contents[length..]); | |
unused_range_lemma(buf.contents[length..], 0, amount); | |
assert buf.contents[length..][0..amount] == buf.contents[length..length + amount]; | |
} | |
method append(other: Vec<T>) | |
requires valid() && other.valid() && other != this && other.buf != this.buf | |
modifies this, this.buf, other, other.buf | |
ensures valid() && elements == old(elements) + old(other.elements) | |
{ | |
reserve(other.length); | |
spare_capacity_is_unused_lemma(other.length); | |
ghost var unused_tail := buf.contents[length + other.length..]; | |
buf.fill_from(length, other.buf, 0, other.length); | |
length := length + other.length; | |
elements := elements + other.elements; | |
other.length := 0; | |
other.elements := []; | |
assert buf.contents == pad_end(elements, buf.cap) by { | |
calc { | |
buf.contents; | |
== | |
old(buf.contents)[..old(length)] + | |
old(other.buf.contents)[0..0 + old(other.length)] + | |
unused_tail; | |
== | |
{ | |
assert old(buf.contents)[..old(length)] == used(old(elements)); | |
assert old(other.buf.contents)[0..0 + old(other.length)] == | |
used(old(other.elements)); | |
assert unused_tail == unused(buf.cap - length); | |
} | |
used(old(elements)) + | |
used(old(other.elements)) + | |
unused(buf.cap - length); | |
== | |
used(elements) + unused(buf.cap - |elements|); | |
} | |
} | |
} | |
method clear() | |
requires valid() | |
modifies this, buf | |
ensures valid() && elements == [] && capacity() == old(capacity()) | |
{ | |
truncate(0); | |
} | |
function method len(): usize | |
reads this | |
{ | |
length | |
} | |
predicate method is_empty() | |
reads this | |
{ | |
length == 0 | |
} | |
method split_off(at: usize) returns (tail: Vec<T>) | |
requires valid() | |
ensures valid() | |
modifies this, buf | |
requires 0 <= at <= len() | |
modifies this, buf | |
ensures elements == old(elements)[..at] | |
ensures tail.elements == old(elements)[at..] | |
ensures capacity() == old(capacity()) | |
ensures fresh(tail) | |
ensures tail.valid() | |
{ | |
var tail_len := length - at; | |
tail := new Vec.with_capacity(tail_len); | |
assert all_unused(tail.buf.contents[0..0 + tail_len]) by { | |
assert tail.elements == []; | |
tail.unused_when_empty_lemma(); | |
unused_range_lemma(tail.buf.contents, 0, 0 + tail_len); | |
} | |
tail.buf.fill_from(0, this.buf, at, tail_len); | |
tail.length := tail_len; | |
tail.elements := elements[at..]; | |
length := at; | |
elements := elements[..at]; | |
// Prove the resulting state is valid. This amounts to proving a single | |
// key component of the valid() predicate, given on the next line: | |
assert buf.contents == pad_end(elements, buf.cap) by { | |
// To achieve this, we separately prove (a) that the portion of the | |
// buffer up to `at` is untouched and therefore still `used(elements)`; | |
// and (b) that the rest of the buffer is unused. | |
assert at <= |old(elements)|; | |
assert buf.contents[..at] == used(elements) by { | |
calc { | |
buf.contents[..at]; | |
== // because we haven't modified this part of the buffer | |
old(buf.contents)[..at]; | |
== | |
pad_end(old(elements), buf.cap)[..at]; | |
== | |
used(old(elements))[..at]; | |
== | |
used(elements); | |
} | |
} | |
assert buf.contents[at..] == unused(buf.cap - |elements|) by { | |
calc { | |
buf.contents[at..]; | |
== | |
buf.contents[at..at + tail_len] + buf.contents[at + tail_len..]; | |
== | |
unused(tail_len) + unused(buf.cap - (at + tail_len)); | |
== | |
{ concat_unused_lemma<T>(tail_len, buf.cap - (at + tail_len)); } | |
unused(tail_len + buf.cap - (at + tail_len)); | |
== | |
unused(buf.cap - at); | |
== | |
unused(buf.cap - |elements|); | |
} | |
} | |
calc { | |
buf.contents; | |
== | |
buf.contents[..at] + buf.contents[at..]; | |
== | |
used(elements) + unused(buf.cap - |elements|); | |
== | |
pad_end(elements, buf.cap); | |
} | |
} | |
} | |
method resize(new_len: usize, value: T) | |
requires valid() | |
ensures valid() | |
modifies this, buf | |
ensures len() == new_len | |
ensures forall i :: 0 <= i < old(len()) && i < new_len ==> elements[i] == old(elements)[i] | |
ensures forall i :: old(len()) <= i < new_len ==> elements[i] == value | |
{ | |
if new_len > length { | |
while length < new_len | |
invariant valid() | |
invariant old(length) <= length <= new_len | |
invariant forall i :: 0 <= i < old(len()) && i < length ==> elements[i] == old(elements)[i] | |
invariant forall i :: old(len()) <= i < length ==> elements[i] == value | |
invariant buf == old(buf) | |
{ | |
push(value); | |
} | |
} else { | |
truncate(new_len); | |
} | |
} | |
method extend_from_slice(other: array<T>) | |
requires valid() | |
ensures valid() | |
ensures elements == old(elements) + other[..] | |
method swap(i: usize, j: usize) | |
requires valid() | |
ensures valid() | |
requires 0 <= i < len() | |
requires 0 <= j < len() | |
modifies buf | |
ensures elements == old(elements)[i := old(get(j))][j := old(get(i))] | |
{ | |
buf.swap(i, j); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment