sekhat · January 24, 2022 18:58
diff --git a/word_wrap.rs b/word_wrap.rs
 use std::iter::Peekable;
 use std::num::NonZeroUsize;

 /// A text token returned from the tokenizer
 #[derive(Clone)]
 enum TextToken<'a> {
    /// The contained string represents a string of text characters
    Text(&'a str),
    /// The contained string represents a string of text characters
    /// that aren't allowed to be broken on to the next line
    DontBreak(&'a str),
    /// Represents a string of characters that are all whitespace
    Whitespace(&'a str),
 }

 /// Determines if `c` is a whitespace character
 fn is_whitespace(c: char) -> bool {
    c == ' ' || c == '\t' || c == '\r' || c == '\n'
 }

 /// The Tokenizer
 struct Tokenizer<'a, I> {
    /// The input
    input: I,
    /// The current string being processed
    current: Option<&'a str>,
 }

 impl<'a, I> Tokenizer<'a, I>
 where
    I: Iterator<Item = &'a str>,
 {
    /// Create a new `Tokenizer` fron an iterator
    fn new<V>(input: V) -> Self 
    where 
        V: IntoIterator<IntoIter = I>
    {
        Self {
            input: input.into_iter(),
            current: None,
        }
    }
 }

 // allow our tokenizer to be cloned if it's containing iterator is also Clone
 impl<'a, I> Clone for Tokenizer<'a, I>
 where
    I: Iterator + Clone,
 {
    fn clone(&self) -> Self {
        Self {
            input: self.input.clone(),
            current: self.current.clone(),
        }
    }
 }

 impl<'a, I> Iterator for Tokenizer<'a, I>
 where
    I: Iterator<Item = &'a str>,
 {
    type Item = TextToken<'a>;

    fn next(&mut self) -> Option<Self::Item> {
        // if we have a current string use that to read from
        // otherwise read the next string from our iterator
        let string = match self.current.take() {
            Some(string) => string,
            None => self.input.next()?,
        };

        // make sure we can read through the character and there string
        // indices
        let mut chars = string.char_indices().peekable();

        // check if our first character is whitespace or not, so we can
        // determine if we are reading a run of whitespace characters or text
        // characters
        let is_matching_whitespace = match chars.peek() {
            Some((_, c)) => is_whitespace(*c),
            None => return self.next(),
        };

        // figure out the end index of our current run
        let end_index = chars
            .filter(|(_, c)| is_whitespace(*c) != is_matching_whitespace)
            .map(|(i, _)| i)
            .next();

        // figure out which part of our string is the result
        let result = match end_index {
            Some(index) => &string[..index],
            None => string,
        };

        // if there is any remaining string set that to current so it's used
        // next loop
        self.current = match end_index {
            Some(index) => Some(&string[index..]),
            None => None,
        };

        // and return our correct result
        if is_matching_whitespace {
            Some(TextToken::Whitespace(result))
        } else {
            Some(TextToken::Text(result))
        }
    }
 }

 /// Represents a token outputted from the TextLayout
 #[derive(Debug)]
 enum LayoutToken<'a> {
    /// The contained string of text
    Text(&'a str),
    /// A space between words
    Space,
    /// A new line
    Newline,
 }

 /// Represents the items that can be pushed back
 enum PushBack<'a> {
    /// A text token to be used instead of the next item from the tokenizer
    Token(TextToken<'a>),
    /// The TextLayout should output a new line, then the next text token
    /// to process should be `TextToken` contained
    NewlineThen(TextToken<'a>),
    /// The TextLayout should output the following `LayoutToken` next
    LayoutToken(LayoutToken<'a>),
    /// There is nothing in the push back
    None,
 }

 impl<'a> PushBack<'a> {
    /// Take the item out of the PushBack, returning it to the caller
    /// and leaving the existing push back as None
    fn take(&mut self) -> PushBack<'a> {
        let mut result = PushBack::None;
        core::mem::swap(&mut result, self);
        result
    }
 }

 /// Split a `&str` at the given utf-8 character index
 fn split_at_char_index(input: &str, index: usize) -> (&str, &str) {
    match input.char_indices().skip(index).next() {
        Some((index, _)) => input.split_at(index),
        None => (input, ""),
    }
 }


 struct TextLayout<'a, I>
 where
    I: Iterator<Item = &'a str>,
 {
    /// The character count that words will be wrapped at
    width: usize,
    /// The current position into the current line
    current: usize,
    /// Hold the push back state
    push_back: PushBack<'a>,
    /// The tokenizer to read `TextToken`s from
    tokenizer: Peekable<Tokenizer<'a, I>>,
 }

 impl<'a, I> TextLayout<'a, I>
 where
    I: Iterator<Item = &'a str>,
 {
    /// Creates a new `TextLayout`, given the expected iterator and non-zero
    /// line width to wrap words at.
    fn new<V>(input: V, width: NonZeroUsize) -> Self
    where
        V: IntoIterator<IntoIter = I>,
    {
        Self {
            width: width.get(),
            current: 0,
            push_back: PushBack::None,
            tokenizer: Tokenizer::new(input.into_iter()).peekable(),
        }
    }
 }

 impl<'a, I> Iterator for TextLayout<'a, I>
 where
    I: Iterator<Item = &'a str> + Clone,
 {
    type Item = LayoutToken<'a>;

    fn next(&mut self) -> Option<Self::Item> {
        let token = match self.push_back.take() {
            // If there's no item on the push back, read a TextToken from
            // the tokenizer
            PushBack::None => self.tokenizer.next()?,
            // If there's a text token on the push back, use that
            PushBack::Token(token) => token,
            // If we are expecting to output a newline, do that and prepare
            // the next text token
            PushBack::NewlineThen(token) => {
                // we first update the push back to be a token for next
                // time `next()` is called
                self.push_back = PushBack::Token(token);

                // reset the line index
                self.current = 0;

                // and return a Newline
                return Some(LayoutToken::Newline);
            }
            // If we are expecting a layout token just return that
            PushBack::LayoutToken(token) => return Some(token),
        };

        // are we allowed to push the current token on to the next line?
        let allow_next_line = match token {
            TextToken::DontBreak(_) => false,
            _ => true,
        };

        match token {
            // Both DontBreak and Text means a string of text
            // DontBreak just means, you aren't allowed to push the word
            // on to the next line hence the allow_next_line variable above
            TextToken::DontBreak(text) | TextToken::Text(text) => {
                //
                // Multiple word tokens in a row means it's likely all one word
                // so we need to add up all the sizes to make sure we make
                // the correct decision

                // We clone the tokenizer here, so we can advance that iterator
                // seperately from our own.
                //
                // As long as the underlying iterator that tokenizer depends on
                // doesn't require allocating to clone, no allocation will occur
                //
                // But that at least leaves it in the users hands
                let mut find_end_iter = self.tokenizer.clone();

                // The length of just this part of the text
                let part_length = text.chars().count();

                // calculate the full length of the current word
                let mut length = part_length;
                for item in find_end_iter {
                    match item {
                        TextToken::DontBreak(text) |
                        TextToken::Text(text) => {
                            length += text.chars().count()
                        },
                        _ => break
                    }
                }
                // So we have a part, which may be smaller than the word
                // we need to decide new line base on the whole word,
                // though allow_next_line changes that behaviour

                let at_start_of_line = self.current == 0;
                let end_line_index = self.current + length;
                let word_will_overflow = end_line_index > self.width;
                let part_will_overflow =
                    self.current + part_length > self.width;

                match (
                    at_start_of_line,
                    word_will_overflow,
                    allow_next_line,
                    part_will_overflow,
                ) {
                    (false, true, false, false) | (true, true, _, false) => {
                        // Force that the next token won't allow being pushed
                        // onto the next line, since we are only part of the
                        // whole word
                        self.push_back = match self.tokenizer.next() {
                            Some(TextToken::Text(text))
                            | Some(TextToken::DontBreak(text)) => {
                                PushBack::Token(TextToken::DontBreak(text))
                            }
                            Some(TextToken::Whitespace(text)) => {
                                PushBack::Token(TextToken::Whitespace(text))
                            }
                            None => PushBack::None,
                        };

                        self.current += text.len();

                        Some(LayoutToken::Text(text))
                    }
                    (false, true, false, true) | (true, true, _, true) => {
                        // allow_next_line is irrelevant here as we are
                        // already at the start of a line
                        //
                        // however, if out part is smaller than the overflow
                        // we just want to return that and make the next
                        // token DontBreak

                        // we know that we will over flow, so it's just
                        // the remaining size of the line we need
                        let valid_char_count = self.width - self.current;
                        let (left, right) =
                            split_at_char_index(text, valid_char_count);

                        if right.len() > 0 {
                            self.push_back =
                                PushBack::NewlineThen(TextToken::Text(right));
                        }

                        self.current += valid_char_count;

                        Some(LayoutToken::Text(left))
                    }
                    (_, false, _, _) => {
                        // We are at start of the line, and we know we wont
                        // overflow so just return text
                        self.current += text.len();
                        Some(LayoutToken::Text(text))
                    }
                    (false, true, true, _) => {
                        // So we aren't at the start of the line, but the word
                        // is allowed to be pushed to the next line, it will
                        // overflow, so we need to return a newline so we can
                        // just push back the token and output the newline
                        //
                        // Doing this should trigger the at start_of_line handlers
                        self.push_back = PushBack::Token(TextToken::Text(text));
                        self.current = 0;
                        Some(LayoutToken::Newline)
                    }
                }
            }
            TextToken::Whitespace(_) => {
                // we can assume getting this far that push_back is now None

                // consume all our whitespace tokens
                while let Some(TextToken::Whitespace(_)) =
                    self.tokenizer.peek()
                {
                    // fine returning early if we reach none while in 
                    // white space
                    self.tokenizer.next()?;
                }


                // If we are at the end of the current line however, force
                // a new line
                let at_start_of_line = self.current == 0;

                // only increase current if not at start of line, so that
                // spaces are trimmed at start of line, but simulated otherwise
                self.current += if at_start_of_line { 0 } else { 1 };

                // if we get none next then we can short circuit return with
                // None, as it has the same result as trimming whitespace
                // at the end
                let next_token = self.next()?;

                match (at_start_of_line, next_token) {
                    // If we have a new line as our next token we can just
                    // return that.
                    (_, LayoutToken::Newline) => Some(LayoutToken::Newline),
                    // if we are at start of line then we can do the same
                    // with text
                    (true, LayoutToken::Text(text)) => {
                        Some(LayoutToken::Text(text))
                    }
                    // if we aren't at start of line we should probably output
                    // our space so it's no longer simulated, but we need to 
                    // make sure next run round is the text we got, so we
                    // put that on the push back
                    (false, LayoutToken::Text(text)) => {
                        self.push_back =
                            PushBack::LayoutToken(LayoutToken::Text(text));

                        Some(LayoutToken::Space)
                    }
                    // if we get a second lot of white space, there might be
                    // even more, so consume all the white space tokens from
                    // the tokenizer before returning our space
                    (_, LayoutToken::Space) => unreachable!()
                }
            }
        }
    }
 }

 fn layout_and_print<'a, I>(iterator: I, width: usize)
 where
    I: IntoIterator<Item = &'a str>,
    I::IntoIter: Clone,
 {
    let width = NonZeroUsize::new(width).unwrap();

    let text_layout = TextLayout::new(iterator, width);

    for item in text_layout {
        match item {
            LayoutToken::Text(text) => print!("{}", text),
            LayoutToken::Space => print!(" "),
            LayoutToken::Newline => println!(""),
        }
    }
 }

 fn main() {
    println!("first");
    layout_and_print(["x---xy---yz--z"], 5);
    println!("\n\nsecond");
    layout_and_print(["x---xy-", "--yz", "--z"], 5);
    println!("\n\nthird");
    layout_and_print(["12345 ", " 67890", "12345", "1234567890"], 5);
    println!("\n\ndone");
 }
	use std::iter::Peekable;
	use std::num::NonZeroUsize;

	/// A text token returned from the tokenizer
	#[derive(Clone)]
	enum TextToken<'a> {
	/// The contained string represents a string of text characters
	Text(&'a str),
	/// The contained string represents a string of text characters
	/// that aren't allowed to be broken on to the next line
	DontBreak(&'a str),
	/// Represents a string of characters that are all whitespace
	Whitespace(&'a str),
	}

	/// Determines if `c` is a whitespace character
	fn is_whitespace(c: char) -> bool {
	c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n'
	}

	/// The Tokenizer
	struct Tokenizer<'a, I> {
	/// The input
	input: I,
	/// The current string being processed
	current: Option<&'a str>,
	}

	impl<'a, I> Tokenizer<'a, I>
	where
	I: Iterator<Item = &'a str>,
	{
	/// Create a new `Tokenizer` fron an iterator
	fn new<V>(input: V) -> Self
	where
	V: IntoIterator<IntoIter = I>
	{
	Self {
	input: input.into_iter(),
	current: None,
	}
	}
	}

	// allow our tokenizer to be cloned if it's containing iterator is also Clone
	impl<'a, I> Clone for Tokenizer<'a, I>
	where
	I: Iterator + Clone,
	{
	fn clone(&self) -> Self {
	Self {
	input: self.input.clone(),
	current: self.current.clone(),
	}
	}
	}

	impl<'a, I> Iterator for Tokenizer<'a, I>
	where
	I: Iterator<Item = &'a str>,
	{
	type Item = TextToken<'a>;

	fn next(&mut self) -> Option<Self::Item> {
	// if we have a current string use that to read from
	// otherwise read the next string from our iterator
	let string = match self.current.take() {
	Some(string) => string,
	None => self.input.next()?,
	};

	// make sure we can read through the character and there string
	// indices
	let mut chars = string.char_indices().peekable();

	// check if our first character is whitespace or not, so we can
	// determine if we are reading a run of whitespace characters or text
	// characters
	let is_matching_whitespace = match chars.peek() {
	Some((_, c)) => is_whitespace(*c),
	None => return self.next(),
	};

	// figure out the end index of our current run
	let end_index = chars
	.filter(\|(_, c)\| is_whitespace(*c) != is_matching_whitespace)
	.map(\|(i, _)\| i)
	.next();

	// figure out which part of our string is the result
	let result = match end_index {
	Some(index) => &string[..index],
	None => string,
	};

	// if there is any remaining string set that to current so it's used
	// next loop
	self.current = match end_index {
	Some(index) => Some(&string[index..]),
	None => None,
	};

	// and return our correct result
	if is_matching_whitespace {
	Some(TextToken::Whitespace(result))
	} else {
	Some(TextToken::Text(result))
	}
	}
	}

	/// Represents a token outputted from the TextLayout
	#[derive(Debug)]
	enum LayoutToken<'a> {
	/// The contained string of text
	Text(&'a str),
	/// A space between words
	Space,
	/// A new line
	Newline,
	}

	/// Represents the items that can be pushed back
	enum PushBack<'a> {
	/// A text token to be used instead of the next item from the tokenizer
	Token(TextToken<'a>),
	/// The TextLayout should output a new line, then the next text token
	/// to process should be `TextToken` contained
	NewlineThen(TextToken<'a>),
	/// The TextLayout should output the following `LayoutToken` next
	LayoutToken(LayoutToken<'a>),
	/// There is nothing in the push back
	None,
	}

	impl<'a> PushBack<'a> {
	/// Take the item out of the PushBack, returning it to the caller
	/// and leaving the existing push back as None
	fn take(&mut self) -> PushBack<'a> {
	let mut result = PushBack::None;
	core::mem::swap(&mut result, self);
	result
	}
	}

	/// Split a `&str` at the given utf-8 character index
	fn split_at_char_index(input: &str, index: usize) -> (&str, &str) {
	match input.char_indices().skip(index).next() {
	Some((index, _)) => input.split_at(index),
	None => (input, ""),
	}
	}


	struct TextLayout<'a, I>
	where
	I: Iterator<Item = &'a str>,
	{
	/// The character count that words will be wrapped at
	width: usize,
	/// The current position into the current line
	current: usize,
	/// Hold the push back state
	push_back: PushBack<'a>,
	/// The tokenizer to read `TextToken`s from
	tokenizer: Peekable<Tokenizer<'a, I>>,
	}

	impl<'a, I> TextLayout<'a, I>
	where
	I: Iterator<Item = &'a str>,
	{
	/// Creates a new `TextLayout`, given the expected iterator and non-zero
	/// line width to wrap words at.
	fn new<V>(input: V, width: NonZeroUsize) -> Self
	where
	V: IntoIterator<IntoIter = I>,
	{
	Self {
	width: width.get(),
	current: 0,
	push_back: PushBack::None,
	tokenizer: Tokenizer::new(input.into_iter()).peekable(),
	}
	}
	}

	impl<'a, I> Iterator for TextLayout<'a, I>
	where
	I: Iterator<Item = &'a str> + Clone,
	{
	type Item = LayoutToken<'a>;

	fn next(&mut self) -> Option<Self::Item> {
	let token = match self.push_back.take() {
	// If there's no item on the push back, read a TextToken from
	// the tokenizer
	PushBack::None => self.tokenizer.next()?,
	// If there's a text token on the push back, use that
	PushBack::Token(token) => token,
	// If we are expecting to output a newline, do that and prepare
	// the next text token
	PushBack::NewlineThen(token) => {
	// we first update the push back to be a token for next
	// time `next()` is called
	self.push_back = PushBack::Token(token);

	// reset the line index
	self.current = 0;

	// and return a Newline
	return Some(LayoutToken::Newline);
	}
	// If we are expecting a layout token just return that
	PushBack::LayoutToken(token) => return Some(token),
	};

	// are we allowed to push the current token on to the next line?
	let allow_next_line = match token {
	TextToken::DontBreak(_) => false,
	_ => true,
	};

	match token {
	// Both DontBreak and Text means a string of text
	// DontBreak just means, you aren't allowed to push the word
	// on to the next line hence the allow_next_line variable above
	TextToken::DontBreak(text) \| TextToken::Text(text) => {
	//
	// Multiple word tokens in a row means it's likely all one word
	// so we need to add up all the sizes to make sure we make
	// the correct decision

	// We clone the tokenizer here, so we can advance that iterator
	// seperately from our own.
	//
	// As long as the underlying iterator that tokenizer depends on
	// doesn't require allocating to clone, no allocation will occur
	//
	// But that at least leaves it in the users hands
	let mut find_end_iter = self.tokenizer.clone();

	// The length of just this part of the text
	let part_length = text.chars().count();

	// calculate the full length of the current word
	let mut length = part_length;
	for item in find_end_iter {
	match item {
	TextToken::DontBreak(text) \|
	TextToken::Text(text) => {
	length += text.chars().count()
	},
	_ => break
	}
	}
	// So we have a part, which may be smaller than the word
	// we need to decide new line base on the whole word,
	// though allow_next_line changes that behaviour

	let at_start_of_line = self.current == 0;
	let end_line_index = self.current + length;
	let word_will_overflow = end_line_index > self.width;
	let part_will_overflow =
	self.current + part_length > self.width;

	match (
	at_start_of_line,
	word_will_overflow,
	allow_next_line,
	part_will_overflow,
	) {
	(false, true, false, false) \| (true, true, _, false) => {
	// Force that the next token won't allow being pushed
	// onto the next line, since we are only part of the
	// whole word
	self.push_back = match self.tokenizer.next() {
	Some(TextToken::Text(text))
	\| Some(TextToken::DontBreak(text)) => {
	PushBack::Token(TextToken::DontBreak(text))
	}
	Some(TextToken::Whitespace(text)) => {
	PushBack::Token(TextToken::Whitespace(text))
	}
	None => PushBack::None,
	};

	self.current += text.len();

	Some(LayoutToken::Text(text))
	}
	(false, true, false, true) \| (true, true, _, true) => {
	// allow_next_line is irrelevant here as we are
	// already at the start of a line
	//
	// however, if out part is smaller than the overflow
	// we just want to return that and make the next
	// token DontBreak

	// we know that we will over flow, so it's just
	// the remaining size of the line we need
	let valid_char_count = self.width - self.current;
	let (left, right) =
	split_at_char_index(text, valid_char_count);

	if right.len() > 0 {
	self.push_back =
	PushBack::NewlineThen(TextToken::Text(right));
	}

	self.current += valid_char_count;

	Some(LayoutToken::Text(left))
	}
	(_, false, _, _) => {
	// We are at start of the line, and we know we wont
	// overflow so just return text
	self.current += text.len();
	Some(LayoutToken::Text(text))
	}
	(false, true, true, _) => {
	// So we aren't at the start of the line, but the word
	// is allowed to be pushed to the next line, it will
	// overflow, so we need to return a newline so we can
	// just push back the token and output the newline
	//
	// Doing this should trigger the at start_of_line handlers
	self.push_back = PushBack::Token(TextToken::Text(text));
	self.current = 0;
	Some(LayoutToken::Newline)
	}
	}
	}
	TextToken::Whitespace(_) => {
	// we can assume getting this far that push_back is now None

	// consume all our whitespace tokens
	while let Some(TextToken::Whitespace(_)) =
	self.tokenizer.peek()
	{
	// fine returning early if we reach none while in
	// white space
	self.tokenizer.next()?;
	}


	// If we are at the end of the current line however, force
	// a new line
	let at_start_of_line = self.current == 0;

	// only increase current if not at start of line, so that
	// spaces are trimmed at start of line, but simulated otherwise
	self.current += if at_start_of_line { 0 } else { 1 };

	// if we get none next then we can short circuit return with
	// None, as it has the same result as trimming whitespace
	// at the end
	let next_token = self.next()?;

	match (at_start_of_line, next_token) {
	// If we have a new line as our next token we can just
	// return that.
	(_, LayoutToken::Newline) => Some(LayoutToken::Newline),
	// if we are at start of line then we can do the same
	// with text
	(true, LayoutToken::Text(text)) => {
	Some(LayoutToken::Text(text))
	}
	// if we aren't at start of line we should probably output
	// our space so it's no longer simulated, but we need to
	// make sure next run round is the text we got, so we
	// put that on the push back
	(false, LayoutToken::Text(text)) => {
	self.push_back =
	PushBack::LayoutToken(LayoutToken::Text(text));

	Some(LayoutToken::Space)
	}
	// if we get a second lot of white space, there might be
	// even more, so consume all the white space tokens from
	// the tokenizer before returning our space
	(_, LayoutToken::Space) => unreachable!()
	}
	}
	}
	}
	}

	fn layout_and_print<'a, I>(iterator: I, width: usize)
	where
	I: IntoIterator<Item = &'a str>,
	I::IntoIter: Clone,
	{
	let width = NonZeroUsize::new(width).unwrap();

	let text_layout = TextLayout::new(iterator, width);

	for item in text_layout {
	match item {
	LayoutToken::Text(text) => print!("{}", text),
	LayoutToken::Space => print!(" "),
	LayoutToken::Newline => println!(""),
	}
	}
	}

	fn main() {
	println!("first");
	layout_and_print(["x---xy---yz--z"], 5);
	println!("\n\nsecond");
	layout_and_print(["x---xy-", "--yz", "--z"], 5);
	println!("\n\nthird");
	layout_and_print(["12345 ", " 67890", "12345", "1234567890"], 5);
	println!("\n\ndone");
	}