Skip to content

Instantly share code, notes, and snippets.

@sunshowers
Created September 28, 2023 20:32

Revisions

  1. sunshowers created this gist Sep 28, 2023.
    391 changes: 391 additions & 0 deletions wrap.rs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,391 @@
    // MIT License
    //
    // Copyright (c) 2023 Oxide Computer Company
    //
    // Permission is hereby granted, free of charge, to any person obtaining a copy
    // of this software and associated documentation files (the "Software"), to deal
    // in the Software without restriction, including without limitation the rights
    // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    // copies of the Software, and to permit persons to whom the Software is
    // furnished to do so, subject to the following conditions:
    //
    // The above copyright notice and this permission notice shall be included in all
    // copies or substantial portions of the Software.
    //
    // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    // SOFTWARE.

    //! This module is an adaptation of textwrap's `Word` library to work with
    //! `tui`'s `Span`s and lines.
    //!
    //! The code is mostly copy-pasted, with the following changes:
    //!
    //! * [`textwrap::core::Word`] is now [`StyledWord`].
    //! * Hyphenation is no longer supported.
    //!
    //! Currently, each element of `tui`'s [`Text::lines`] is assumed to be a
    //! separate line. We don't check for whether a line's content has embedded
    //! newlines in it, but we could in the future if necessary. (Embedded newlines
    //! won't break the output, but they might make the output look a bit weird.)
    use itertools::{Itertools, Position};
    use ratatui::text::{Line, Span, Text};
    use textwrap::{
    core::{display_width, Fragment},
    wrap_algorithms::{wrap_optimal_fit, Penalties},
    };

    pub struct Options<'a> {
    /// The width in columns at which the text will be wrapped.
    pub width: usize,
    /// Indentation used for the first line of output.
    pub initial_indent: Span<'a>,
    /// Indentation used for subsequent lines of output.
    pub subsequent_indent: Span<'a>,
    /// Allow long words to be broken if they cannot fit on a line.
    /// When set to `false`, some lines may be longer than
    /// `self.width`.
    pub break_words: bool,
    }

    /// Wraps a [`Text`] block.
    ///
    /// `text` should be broken up into lines at the time it's passed in.
    pub(crate) fn wrap_text<'a>(
    text: &'a Text<'_>,
    options: Options<'a>,
    ) -> Text<'a> {
    let mut lines = Vec::new();
    // We currently assume that lines in text don't have embedded newlines in
    // them. This assumption might need to be revisited.
    for line in &text.lines {
    wrap_single_line(line, &options, &mut lines);
    }

    Text::from(lines)
    }

    /// Wraps a [`Line`] representing a single line.
    ///
    /// If the text contains multiple lines, use [`wrap_text`] instead.
    pub(crate) fn wrap_line<'a>(
    line: &'a Line<'_>,
    options: Options<'a>,
    ) -> Text<'a> {
    let mut lines = Vec::new();
    wrap_single_line(line, &options, &mut lines);
    Text::from(lines)
    }

    fn wrap_single_line<'a>(
    line: &'a Line<'_>,
    options: &Options<'a>,
    lines: &mut Vec<Line<'a>>,
    ) {
    let indent = if lines.is_empty() {
    options.initial_indent.clone()
    } else {
    options.subsequent_indent.clone()
    };
    if line.width() < options.width && indent.content.is_empty() {
    lines.push(borrow_line(line));
    } else {
    wrap_single_line_slow_path(line, options, lines)
    }
    }

    fn borrow_line<'a>(line: &'a Line<'_>) -> Line<'a> {
    let spans = line
    .spans
    .iter()
    .map(|span| Span::styled(span.content.as_ref(), span.style))
    .collect::<Vec<_>>();
    Line::from(spans)
    }

    fn wrap_single_line_slow_path<'a>(
    line: &'a Line<'_>,
    options: &Options<'a>,
    lines: &mut Vec<Line<'a>>,
    ) {
    // Span::width (options.initial_indent.width() etc) use the Unicode display
    // width, which is what we expect.
    let initial_width =
    options.width.saturating_sub(options.initial_indent.width());
    let subsequent_width =
    options.width.saturating_sub(options.subsequent_indent.width());
    let line_widths = [initial_width, subsequent_width];

    let split_words = find_words_in_line(&line);

    // We don't perform any word splitting.
    let broken_words = if options.break_words {
    let mut broken_words = break_words(split_words, line_widths[1]);
    if !options.initial_indent.content.is_empty() {
    // Without this, the first word will always go into the
    // first line. However, since we break words based on the
    // _second_ line width, it can be wrong to unconditionally
    // put the first word onto the first line. An empty
    // zero-width word fixed this.
    broken_words.insert(0, StyledWord::empty());
    }
    broken_words
    } else {
    split_words.collect::<Vec<_>>()
    };

    let f64_line_widths =
    line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();

    // The optimal fit wrap looks nicer, and we're wrapping pretty small amounts
    // of text so performance is unlikely to be an issue.
    let wrapped_lines =
    wrap_optimal_fit(&broken_words, &f64_line_widths, &Penalties::new())
    .expect("computation cannot overflow with restricted line widths");

    for words in wrapped_lines {
    let mut output_line = Vec::new();

    if lines.is_empty() && !options.initial_indent.content.is_empty() {
    output_line.push(options.initial_indent.clone());
    } else if !lines.is_empty()
    && !options.subsequent_indent.content.is_empty()
    {
    output_line.push(options.subsequent_indent.clone());
    }

    for (position, word) in words.into_iter().with_position() {
    match position {
    Position::First | Position::Middle => {
    output_line.extend(word.word_span());
    output_line.extend(word.whitespace_span());
    }
    Position::Last | Position::Only => {
    // Don't add trailing whitespace, just the content.
    output_line.extend(word.word_span());
    // We don't support hyphenation at the moment, but if we
    // did, this is where they would go.
    }
    }
    }

    lines.push(Line::from(output_line));
    }
    }

    fn find_words_in_line<'a>(
    line: &'a Line<'_>,
    ) -> impl Iterator<Item = StyledWord<'a>> {
    line.spans.iter().flat_map(|span| find_words_in_span(span))
    }

    /// Breaks this span into smaller words.
    ///
    /// This assumes the only word breaks are ASCII spaces. In particular, it
    /// assume that there are no newlines anywhere within a span.
    fn find_words_in_span<'a>(
    span: &'a Span<'_>,
    ) -> impl Iterator<Item = StyledWord<'a>> {
    let mut start = 0;
    let mut in_whitespace = false;
    let mut char_indices = span.content.char_indices();

    std::iter::from_fn(move || {
    for (idx, ch) in char_indices.by_ref() {
    if in_whitespace && ch != ' ' {
    let word = StyledWord::new_sub_span(span, start, idx);
    start = idx;
    in_whitespace = ch == ' ';
    return Some(word);
    }

    in_whitespace = ch == ' ';
    }

    let content_len = span.content.len();
    if start < content_len {
    let word = StyledWord::new_sub_span(span, start, content_len);
    start = content_len;
    return Some(word);
    }

    None
    })
    }

    /// A word with a style associated with it.
    ///
    /// This is similar to a [`textwrap::core::Word`], except each word also has a
    /// style associated with it.
    #[derive(Copy, Clone, Debug)]
    struct StyledWord<'a> {
    word: &'a str,
    width: usize,
    whitespace: &'a str,
    style: ratatui::style::Style,
    }

    impl<'a> StyledWord<'a> {
    #[allow(unused)]
    fn new(span: &'a Span<'_>) -> Self {
    // We assume the whitespace consists of ' ' only. This allows us to
    // compute the display width in constant time.
    Self::new_impl(&span.content, span.style)
    }

    fn new_sub_span(span: &'a Span<'_>, start: usize, end: usize) -> Self {
    let content = &span.content[start..end];
    Self::new_impl(content, span.style)
    }

    fn new_impl(content: &'a str, style: ratatui::style::Style) -> Self {
    let trimmed = content.trim_end_matches(' ');
    Self {
    word: trimmed,
    width: display_width(trimmed),
    whitespace: &content[trimmed.len()..],
    style,
    }
    }

    fn empty() -> Self {
    Self {
    word: "",
    width: 0,
    whitespace: "",
    style: ratatui::style::Style::default(),
    }
    }

    fn word_span(&self) -> Option<Span<'a>> {
    (!self.word.is_empty()).then(|| Span::styled(self.word, self.style))
    }

    fn whitespace_span(&self) -> Option<Span<'a>> {
    (!self.whitespace.is_empty())
    .then(|| Span::styled(self.whitespace, self.style))
    }

    /// Break this span into smaller words with a width of at most `line_width`.
    /// The whitespace from this `SpanWord` is added to the last piece.
    fn break_apart<'b>(
    &'b self,
    line_width: usize,
    ) -> impl Iterator<Item = StyledWord<'a>> + 'b {
    let mut char_indices = self.word.char_indices();
    let mut offset = 0;
    let mut width = 0;

    std::iter::from_fn(move || {
    while let Some((idx, ch)) = char_indices.next() {
    if skip_ansi_escape_sequence(
    ch,
    &mut char_indices.by_ref().map(|(_, ch)| ch),
    ) {
    continue;
    }

    if width > 0 && width + ch_width(ch) > line_width {
    let word = StyledWord {
    word: &self.word[offset..idx],
    width,
    whitespace: "",
    style: self.style,
    };
    offset = idx;
    width = ch_width(ch);
    return Some(word);
    }

    width += ch_width(ch);
    }

    if offset < self.word.len() {
    let word = StyledWord {
    word: &self.word[offset..],
    width,
    whitespace: self.whitespace,
    style: self.style,
    };
    offset = self.word.len();
    return Some(word);
    }

    None
    })
    }
    }

    impl<'a> Fragment for StyledWord<'a> {
    fn width(&self) -> f64 {
    // self.width is the display width, which is what we care about here.
    self.width as f64
    }

    fn whitespace_width(&self) -> f64 {
    // Since whitespace is always ASCII spaces, this is equal to the number
    // of whitespace characters.
    self.whitespace.len() as f64
    }

    fn penalty_width(&self) -> f64 {
    // We don't insert hyphens or anything similar else -- just use 0.0
    // here.
    0.0
    }
    }

    /// Forcibly break spans wider than `line_width` into smaller spans.
    ///
    /// This simply calls [`Span::break_apart`] on spans that are too wide.
    fn break_words<'a, I>(spans: I, line_width: usize) -> Vec<StyledWord<'a>>
    where
    I: IntoIterator<Item = StyledWord<'a>>,
    {
    let mut shortened_spans = Vec::new();
    for span in spans {
    if span.width() > line_width as f64 {
    shortened_spans.extend(span.break_apart(line_width));
    } else {
    shortened_spans.push(span);
    }
    }
    shortened_spans
    }

    /// The CSI or “Control Sequence Introducer” introduces an ANSI escape
    /// sequence. This is typically used for colored text and will be
    /// ignored when computing the text width.
    const CSI: (char, char) = ('\x1b', '[');
    /// The final bytes of an ANSI escape sequence must be in this range.
    const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';

    /// Skip ANSI escape sequences. The `ch` is the current `char`, the
    /// `chars` provide the following characters. The `chars` will be
    /// modified if `ch` is the start of an ANSI escape sequence.
    #[inline]
    fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(
    ch: char,
    chars: &mut I,
    ) -> bool {
    if ch == CSI.0 && chars.next() == Some(CSI.1) {
    // We have found the start of an ANSI escape code, typically
    // used for colored terminal text. We skip until we find a
    // "final byte" in the range 0x40–0x7E.
    for ch in chars {
    if ANSI_FINAL_BYTE.contains(&ch) {
    return true;
    }
    }
    }
    false
    }

    fn ch_width(ch: char) -> usize {
    unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
    }