Created
November 22, 2015 16:56
-
-
Save colin-kiegel/735df3ddb40da853c923 to your computer and use it in GitHub Desktop.
Twig Tokens with str-references - *without RC*
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::mem; | |
/// This is an attempt to reduce allocations in the Twig-Rust template engine. | |
/// | |
/// During compilation the inial template string is transformed | |
/// - to a token stream during lexing | |
/// - to a node tree during parsing | |
/// | |
/// The tokens and nodes mostly contain slices of the original string. But the | |
/// current implementation uses new allocations and copy-by-value instead of | |
/// references. The reason is that rusts notion of lifetimes can not express a | |
/// guarantee for owned objects. However it seems neccessary to pass ownership | |
/// of the original template string *along* with its references, to make sure | |
/// it really survives the references. `Rc`ing the template string in each of | |
/// its slices does not seem to be a good fit, because it would introduce | |
/// its own overhead ~O(n). | |
/// | |
/// There might be a way to construct a new object taking care of the following | |
/// - ownership of the template string | |
/// - management of all slice references | |
/// - ensuring the template string to survive all references | |
/// | |
/// This approach has some drawbacks, too | |
/// - the objects API should be rust-safe, which greatly constrains its | |
/// flexibility. It seems as if any operation that operates on slice | |
/// references needs to be managed by this new object in one way or | |
/// the other. E.g. constructing a new `Token` referencing a match | |
/// in the template string must be done by this object, because this | |
/// object would be solely responsible for the pointers validity. | |
/// Rusts lifetimes can not help here anymore, becaue they can not | |
/// express this concept of self-contained-reference. At this point | |
/// there seem to be three options | |
/// (a) runtime-overhead = status quo | |
/// (b) architectural restrictions, but static safety guarantee | |
/// (c) unsafe API | |
/// The consequences of (b) and (c) seem to be far-reaching. E.g. | |
/// (b) means to move a lot of very different logic into this object | |
/// most likely violating separation of concerns. And (c) would | |
/// require `unsafe{}` code at other places, too - which looks | |
/// like a last resort, only. | |
/// - need to refactor *a lot* of code of lexer + parser. Especially if | |
/// road (b) is taken, a lot of logic must move into this new object. | |
/// | |
/// It seems reasonable to leave things as they are and continue with (a). | |
/// However this could be a starting point for further investigations. | |
#[allow(dead_code)] | |
#[derive(Debug)] | |
enum Token<T> { | |
Integer(u64), | |
Value(T) | |
} | |
pub type RefToken<'a> = Token<&'a str>; | |
pub type UnsafeToken = Token<*const str>; | |
pub type OwnedToken = Token<String>; | |
#[allow(dead_code)] | |
struct Stream { | |
/// The `unsafe_buffer` contains all token string data (contiguously in memory). | |
/// | |
/// Stream wrapper may *never* be mutate `unsafe_buffer` as long as any | |
/// `UnsafeToken` may reference a slice of `unsafe_buffer`. | |
/// It is marked unsafe_... because of this implicit contract. | |
/// | |
unsafe_buffer: String, | |
/// `UnsafeToken` may contain *str slices to `buffer` | |
/// | |
/// The public interface must always convert `UnsafeToken` to `Token<'a>` | |
/// and connect its lifetime `'a` to a reference `&'a Self`. | |
/// It is marked unsafe_... because of this implicit contract. | |
unsafe_str: *const str, | |
unsafe_token: UnsafeToken, | |
unsafe_token_vec: Vec<UnsafeToken>, | |
} | |
#[allow(dead_code)] | |
#[derive(Debug)] | |
struct StreamHandle<'a> { | |
/// The `buffer` contains all token string data (contiguously in memory). | |
/// | |
/// Stream wrapper may *never* be mutate the `buffer` as long as any | |
/// `UnsafeToken` may reference a slice of `buffer`. | |
safe_buffer: &'a str, | |
/// `UnsafeToken` may contain *str slices to `buffer` | |
/// | |
/// The public interface must always convert `UnsafeToken` to `Token<'a>` | |
/// and connect its lifetime `'a` to a reference `&'a Self` | |
//token: Vec<UnsafeToken>, | |
safe_str: &'a str, | |
safe_token: &'a RefToken<'a>, | |
safe_token_vec: &'a Vec<RefToken<'a>>, | |
} | |
#[allow(dead_code)] | |
impl Stream { | |
pub fn new(buffer: String) -> Stream { | |
Stream { | |
unsafe_buffer: buffer, | |
unsafe_str: "Hello World!",//token: Default::default(), | |
unsafe_token: Token::Value("Good morning!"), | |
unsafe_token_vec: Default::default(), | |
} | |
} | |
/// *Read-only* access to `unsafe_buffer` is safe. | |
pub fn safe_buffer(&self) -> &str { | |
&self.unsafe_buffer | |
} | |
pub fn update_str(&mut self) { | |
if let Some(word) = self.unsafe_buffer.split_whitespace().next() { | |
self.unsafe_str = word; | |
} | |
} | |
pub fn update_vec(&mut self) { | |
for word in self.unsafe_buffer.split_whitespace() { | |
self.unsafe_token_vec.push(Token::Value(word)); | |
} | |
} | |
pub fn safe_str<'a>(&'a self) -> &'a str { | |
unsafe { | |
&*self.unsafe_str | |
} | |
} | |
pub fn safe_token<'a>(&'a self) -> &'a RefToken<'a> { | |
unsafe { | |
mem::transmute::< | |
&Token<*const str>, | |
&Token<&str>> | |
(&self.unsafe_token) | |
} | |
} | |
pub fn safe_token_vec<'a>(&'a self) -> &'a Vec<RefToken<'a>> { | |
unsafe { | |
mem::transmute::< | |
&Vec<Token<*const str>>, | |
&Vec<Token<&str>>> | |
(&self.unsafe_token_vec) | |
} | |
} | |
pub fn safe_handle<'a>(&'a self) -> StreamHandle<'a> { | |
StreamHandle { | |
safe_buffer: self.safe_buffer(), | |
safe_str: self.safe_str(), | |
safe_token: self.safe_token(), | |
safe_token_vec: self.safe_token_vec(), | |
} | |
} | |
} | |
#[allow(dead_code)] | |
fn main() { | |
let buffer = "Hello World!".to_string(); | |
let mut stream = Stream::new(buffer); | |
stream.update_str(); | |
stream.update_vec(); | |
println!("safe_str() = {:?}", stream.safe_str()); | |
println!("safe_token() = {:?}", stream.safe_token()); | |
println!("safe_token_vec() = {:?}", stream.safe_token_vec()); | |
println!("safe_handle() = {:?}", stream.safe_handle()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment