Created
September 28, 2023 20:32
-
-
Save sunshowers/457102aabd9db3bb23d61f2b4259bdd0 to your computer and use it in GitHub Desktop.
ratatui wrapping with textwrap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// MIT License | |
// | |
// Copyright (c) 2023 Oxide Computer Company | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in all | |
// copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
// SOFTWARE. | |
//! This module is an adaptation of textwrap's `Word` library to work with | |
//! `tui`'s `Span`s and lines. | |
//! | |
//! The code is mostly copy-pasted, with the following changes: | |
//! | |
//! * [`textwrap::core::Word`] is now [`StyledWord`]. | |
//! * Hyphenation is no longer supported. | |
//! | |
//! Currently, each element of `tui`'s [`Text::lines`] is assumed to be a | |
//! separate line. We don't check for whether a line's content has embedded | |
//! newlines in it, but we could in the future if necessary. (Embedded newlines | |
//! won't break the output, but they might make the output look a bit weird.) | |
use itertools::{Itertools, Position}; | |
use ratatui::text::{Line, Span, Text}; | |
use textwrap::{ | |
core::{display_width, Fragment}, | |
wrap_algorithms::{wrap_optimal_fit, Penalties}, | |
}; | |
pub struct Options<'a> { | |
/// The width in columns at which the text will be wrapped. | |
pub width: usize, | |
/// Indentation used for the first line of output. | |
pub initial_indent: Span<'a>, | |
/// Indentation used for subsequent lines of output. | |
pub subsequent_indent: Span<'a>, | |
/// Allow long words to be broken if they cannot fit on a line. | |
/// When set to `false`, some lines may be longer than | |
/// `self.width`. | |
pub break_words: bool, | |
} | |
/// Wraps a [`Text`] block. | |
/// | |
/// `text` should be broken up into lines at the time it's passed in. | |
pub(crate) fn wrap_text<'a>( | |
text: &'a Text<'_>, | |
options: Options<'a>, | |
) -> Text<'a> { | |
let mut lines = Vec::new(); | |
// We currently assume that lines in text don't have embedded newlines in | |
// them. This assumption might need to be revisited. | |
for line in &text.lines { | |
wrap_single_line(line, &options, &mut lines); | |
} | |
Text::from(lines) | |
} | |
/// Wraps a [`Line`] representing a single line. | |
/// | |
/// If the text contains multiple lines, use [`wrap_text`] instead. | |
pub(crate) fn wrap_line<'a>( | |
line: &'a Line<'_>, | |
options: Options<'a>, | |
) -> Text<'a> { | |
let mut lines = Vec::new(); | |
wrap_single_line(line, &options, &mut lines); | |
Text::from(lines) | |
} | |
fn wrap_single_line<'a>( | |
line: &'a Line<'_>, | |
options: &Options<'a>, | |
lines: &mut Vec<Line<'a>>, | |
) { | |
let indent = if lines.is_empty() { | |
options.initial_indent.clone() | |
} else { | |
options.subsequent_indent.clone() | |
}; | |
if line.width() < options.width && indent.content.is_empty() { | |
lines.push(borrow_line(line)); | |
} else { | |
wrap_single_line_slow_path(line, options, lines) | |
} | |
} | |
fn borrow_line<'a>(line: &'a Line<'_>) -> Line<'a> { | |
let spans = line | |
.spans | |
.iter() | |
.map(|span| Span::styled(span.content.as_ref(), span.style)) | |
.collect::<Vec<_>>(); | |
Line::from(spans) | |
} | |
fn wrap_single_line_slow_path<'a>( | |
line: &'a Line<'_>, | |
options: &Options<'a>, | |
lines: &mut Vec<Line<'a>>, | |
) { | |
// Span::width (options.initial_indent.width() etc) use the Unicode display | |
// width, which is what we expect. | |
let initial_width = | |
options.width.saturating_sub(options.initial_indent.width()); | |
let subsequent_width = | |
options.width.saturating_sub(options.subsequent_indent.width()); | |
let line_widths = [initial_width, subsequent_width]; | |
let split_words = find_words_in_line(&line); | |
// We don't perform any word splitting. | |
let broken_words = if options.break_words { | |
let mut broken_words = break_words(split_words, line_widths[1]); | |
if !options.initial_indent.content.is_empty() { | |
// Without this, the first word will always go into the | |
// first line. However, since we break words based on the | |
// _second_ line width, it can be wrong to unconditionally | |
// put the first word onto the first line. An empty | |
// zero-width word fixed this. | |
broken_words.insert(0, StyledWord::empty()); | |
} | |
broken_words | |
} else { | |
split_words.collect::<Vec<_>>() | |
}; | |
let f64_line_widths = | |
line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>(); | |
// The optimal fit wrap looks nicer, and we're wrapping pretty small amounts | |
// of text so performance is unlikely to be an issue. | |
let wrapped_lines = | |
wrap_optimal_fit(&broken_words, &f64_line_widths, &Penalties::new()) | |
.expect("computation cannot overflow with restricted line widths"); | |
for words in wrapped_lines { | |
let mut output_line = Vec::new(); | |
if lines.is_empty() && !options.initial_indent.content.is_empty() { | |
output_line.push(options.initial_indent.clone()); | |
} else if !lines.is_empty() | |
&& !options.subsequent_indent.content.is_empty() | |
{ | |
output_line.push(options.subsequent_indent.clone()); | |
} | |
for (position, word) in words.into_iter().with_position() { | |
match position { | |
Position::First | Position::Middle => { | |
output_line.extend(word.word_span()); | |
output_line.extend(word.whitespace_span()); | |
} | |
Position::Last | Position::Only => { | |
// Don't add trailing whitespace, just the content. | |
output_line.extend(word.word_span()); | |
// We don't support hyphenation at the moment, but if we | |
// did, this is where they would go. | |
} | |
} | |
} | |
lines.push(Line::from(output_line)); | |
} | |
} | |
fn find_words_in_line<'a>( | |
line: &'a Line<'_>, | |
) -> impl Iterator<Item = StyledWord<'a>> { | |
line.spans.iter().flat_map(|span| find_words_in_span(span)) | |
} | |
/// Breaks this span into smaller words. | |
/// | |
/// This assumes the only word breaks are ASCII spaces. In particular, it | |
/// assume that there are no newlines anywhere within a span. | |
fn find_words_in_span<'a>( | |
span: &'a Span<'_>, | |
) -> impl Iterator<Item = StyledWord<'a>> { | |
let mut start = 0; | |
let mut in_whitespace = false; | |
let mut char_indices = span.content.char_indices(); | |
std::iter::from_fn(move || { | |
for (idx, ch) in char_indices.by_ref() { | |
if in_whitespace && ch != ' ' { | |
let word = StyledWord::new_sub_span(span, start, idx); | |
start = idx; | |
in_whitespace = ch == ' '; | |
return Some(word); | |
} | |
in_whitespace = ch == ' '; | |
} | |
let content_len = span.content.len(); | |
if start < content_len { | |
let word = StyledWord::new_sub_span(span, start, content_len); | |
start = content_len; | |
return Some(word); | |
} | |
None | |
}) | |
} | |
/// A word with a style associated with it. | |
/// | |
/// This is similar to a [`textwrap::core::Word`], except each word also has a | |
/// style associated with it. | |
#[derive(Copy, Clone, Debug)] | |
struct StyledWord<'a> { | |
word: &'a str, | |
width: usize, | |
whitespace: &'a str, | |
style: ratatui::style::Style, | |
} | |
impl<'a> StyledWord<'a> { | |
#[allow(unused)] | |
fn new(span: &'a Span<'_>) -> Self { | |
// We assume the whitespace consists of ' ' only. This allows us to | |
// compute the display width in constant time. | |
Self::new_impl(&span.content, span.style) | |
} | |
fn new_sub_span(span: &'a Span<'_>, start: usize, end: usize) -> Self { | |
let content = &span.content[start..end]; | |
Self::new_impl(content, span.style) | |
} | |
fn new_impl(content: &'a str, style: ratatui::style::Style) -> Self { | |
let trimmed = content.trim_end_matches(' '); | |
Self { | |
word: trimmed, | |
width: display_width(trimmed), | |
whitespace: &content[trimmed.len()..], | |
style, | |
} | |
} | |
fn empty() -> Self { | |
Self { | |
word: "", | |
width: 0, | |
whitespace: "", | |
style: ratatui::style::Style::default(), | |
} | |
} | |
fn word_span(&self) -> Option<Span<'a>> { | |
(!self.word.is_empty()).then(|| Span::styled(self.word, self.style)) | |
} | |
fn whitespace_span(&self) -> Option<Span<'a>> { | |
(!self.whitespace.is_empty()) | |
.then(|| Span::styled(self.whitespace, self.style)) | |
} | |
/// Break this span into smaller words with a width of at most `line_width`. | |
/// The whitespace from this `SpanWord` is added to the last piece. | |
fn break_apart<'b>( | |
&'b self, | |
line_width: usize, | |
) -> impl Iterator<Item = StyledWord<'a>> + 'b { | |
let mut char_indices = self.word.char_indices(); | |
let mut offset = 0; | |
let mut width = 0; | |
std::iter::from_fn(move || { | |
while let Some((idx, ch)) = char_indices.next() { | |
if skip_ansi_escape_sequence( | |
ch, | |
&mut char_indices.by_ref().map(|(_, ch)| ch), | |
) { | |
continue; | |
} | |
if width > 0 && width + ch_width(ch) > line_width { | |
let word = StyledWord { | |
word: &self.word[offset..idx], | |
width, | |
whitespace: "", | |
style: self.style, | |
}; | |
offset = idx; | |
width = ch_width(ch); | |
return Some(word); | |
} | |
width += ch_width(ch); | |
} | |
if offset < self.word.len() { | |
let word = StyledWord { | |
word: &self.word[offset..], | |
width, | |
whitespace: self.whitespace, | |
style: self.style, | |
}; | |
offset = self.word.len(); | |
return Some(word); | |
} | |
None | |
}) | |
} | |
} | |
impl<'a> Fragment for StyledWord<'a> { | |
fn width(&self) -> f64 { | |
// self.width is the display width, which is what we care about here. | |
self.width as f64 | |
} | |
fn whitespace_width(&self) -> f64 { | |
// Since whitespace is always ASCII spaces, this is equal to the number | |
// of whitespace characters. | |
self.whitespace.len() as f64 | |
} | |
fn penalty_width(&self) -> f64 { | |
// We don't insert hyphens or anything similar else -- just use 0.0 | |
// here. | |
0.0 | |
} | |
} | |
/// Forcibly break spans wider than `line_width` into smaller spans. | |
/// | |
/// This simply calls [`Span::break_apart`] on spans that are too wide. | |
fn break_words<'a, I>(spans: I, line_width: usize) -> Vec<StyledWord<'a>> | |
where | |
I: IntoIterator<Item = StyledWord<'a>>, | |
{ | |
let mut shortened_spans = Vec::new(); | |
for span in spans { | |
if span.width() > line_width as f64 { | |
shortened_spans.extend(span.break_apart(line_width)); | |
} else { | |
shortened_spans.push(span); | |
} | |
} | |
shortened_spans | |
} | |
/// The CSI or “Control Sequence Introducer” introduces an ANSI escape | |
/// sequence. This is typically used for colored text and will be | |
/// ignored when computing the text width. | |
const CSI: (char, char) = ('\x1b', '['); | |
/// The final bytes of an ANSI escape sequence must be in this range. | |
const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e'; | |
/// Skip ANSI escape sequences. The `ch` is the current `char`, the | |
/// `chars` provide the following characters. The `chars` will be | |
/// modified if `ch` is the start of an ANSI escape sequence. | |
#[inline] | |
fn skip_ansi_escape_sequence<I: Iterator<Item = char>>( | |
ch: char, | |
chars: &mut I, | |
) -> bool { | |
if ch == CSI.0 && chars.next() == Some(CSI.1) { | |
// We have found the start of an ANSI escape code, typically | |
// used for colored terminal text. We skip until we find a | |
// "final byte" in the range 0x40–0x7E. | |
for ch in chars { | |
if ANSI_FINAL_BYTE.contains(&ch) { | |
return true; | |
} | |
} | |
} | |
false | |
} | |
fn ch_width(ch: char) -> usize { | |
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this gist!
It works well, but I noticed that
subsequent_indent
is prepended to the second line even if the first line was not broken up. This can be fixed as follows: https://gist.github.com/tindzk/489f7298eab43f4b174ee62777e93769/revisions