Last active
August 29, 2015 14:20
-
-
Save cbiffle/a66727c8c5a1140a40c5 to your computer and use it in GitHub Desktop.
cat(1) in Rust
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A simple program to help me learn Rust. This is a functional clone | |
// of cat circa 6th Edition Unix, before it grew all the options and | |
// stuff. | |
use std::env; | |
use std::io; | |
use std::fs::File; | |
use std::io::Write; | |
use std::io::BufRead; | |
/// Wrapper function to let us use Result and try. | |
/// Any error originating in the cat implementation will be printed here. | |
fn main() { | |
cat().ok().expect("I/O failed") | |
} | |
/// The actual implementation of cat, in a context where we can try and fail. | |
fn cat() -> io::Result<()> { | |
let mut out = io::stdout(); | |
let args = env::args(); | |
if args.len() == 1 { | |
cat_stdin(&mut out) | |
} else { | |
for arg in args.skip(1) { | |
match arg.as_ref() { | |
"-" => try!(cat_stdin(&mut out)), | |
_ => try!(cat1(io::BufReader::new(try!(File::open(arg))), &mut out)) | |
} | |
} | |
Ok(()) | |
} | |
} | |
/// Factor of cat for processing stdin using a single lock, instead of locking | |
/// at each read. Since we are single-threaded and always exhaust our input, | |
/// this is the least work we can do here. | |
fn cat_stdin(output: &mut io::Stdout) -> io::Result<()> { | |
// Having points for these values seems strange, but the lifetimes don't | |
// please the borrow checker without them *both*. | |
// | |
// Consider the alternatives. | |
// | |
// First: `cat1(io::stdin().lock(), output)` | |
// | |
// This would require the lifetime of the result of `stdin` to be extended | |
// to include the lifetime of the result of `lock`, which is not how things | |
// are defined currently. | |
// | |
// This creates the same issue for `let b = io::stdin().lock(); ...` | |
// | |
// So, then: `let s = io::stdin(); cat1(s.lock(), output)` | |
// | |
// That one doesn't work, and I honestly don't understand why. I admit I'm | |
// assuming that Rust's rules for temporaries in argument position are similar | |
// to C++'s: that they have an implicit lifetime that extends until the call | |
// returns. But that does not appear to be the case. | |
// | |
// I suspect I'm hitting the issue described in Rust RFC 66: | |
// https://github.com/rust-lang/rfcs/blob/master/text/0066-better-temporary-lifetimes.md | |
let s = io::stdin(); | |
let b = s.lock(); | |
cat1(b, output) | |
} | |
/// Single-file factor of cat. Uses direct access to the input buffer. | |
/// | |
/// Note that this is written as generic. This is one of two strategies I | |
/// considered. | |
/// | |
/// This version uses static polymorphism, analogous to templates in C++. Two | |
/// versions of `cat1` will be compiled: one for operating on a `StdinLock` and | |
/// one for a `BufReader`. The key to this strategy, and the reason I chose it, | |
/// is that the input is accepted by move -- the caller can't use it after | |
/// giving it to `cat1`. This was semantically appealing. | |
/// | |
/// The other approach would use dynamic (runtime) polymorphism in the form of | |
/// trait objects. Instead of accepting an `R` by move, we'd accept a `BufRead` | |
/// by reference. A single `cat1` would be compiled. Taking trait objects by | |
/// move, however, is more involved (it requires boxing) and I felt it was a | |
/// distraction. So I didn't do it. | |
fn cat1<R: BufRead>(mut input: R, output: &mut io::Stdout) -> io::Result<()> { | |
loop { | |
// You may be asking yourself, "now, self, why did he go to the trouble of | |
// binding `n` instead of calling `input.consume` right after `write_all`?" | |
// | |
// Because it seems that the existence of the `data` reference implies the | |
// continued borrowing of `input` by `fill_buf`. I'm not sure how it | |
// happens, but one must get `data` out of scope before calling other | |
// methods on `input`. | |
let n = { | |
let data = try!(input.fill_buf()); | |
match data.len() { | |
0 => return Ok(()), | |
n => { | |
try!(output.write_all(data)); | |
n | |
} | |
} | |
}; | |
// With `data` out of scope, we can do this: | |
input.consume(n); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment