Skip to content

Instantly share code, notes, and snippets.

@sunshowers
Created September 28, 2023 20:32
Show Gist options
  • Save sunshowers/457102aabd9db3bb23d61f2b4259bdd0 to your computer and use it in GitHub Desktop.
Save sunshowers/457102aabd9db3bb23d61f2b4259bdd0 to your computer and use it in GitHub Desktop.
ratatui wrapping with textwrap
// MIT License
//
// Copyright (c) 2023 Oxide Computer Company
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//! This module is an adaptation of textwrap's `Word` library to work with
//! `tui`'s `Span`s and lines.
//!
//! The code is mostly copy-pasted, with the following changes:
//!
//! * [`textwrap::core::Word`] is now [`StyledWord`].
//! * Hyphenation is no longer supported.
//!
//! Currently, each element of `tui`'s [`Text::lines`] is assumed to be a
//! separate line. We don't check for whether a line's content has embedded
//! newlines in it, but we could in the future if necessary. (Embedded newlines
//! won't break the output, but they might make the output look a bit weird.)
use itertools::{Itertools, Position};
use ratatui::text::{Line, Span, Text};
use textwrap::{
core::{display_width, Fragment},
wrap_algorithms::{wrap_optimal_fit, Penalties},
};
pub struct Options<'a> {
/// The width in columns at which the text will be wrapped.
pub width: usize,
/// Indentation used for the first line of output.
pub initial_indent: Span<'a>,
/// Indentation used for subsequent lines of output.
pub subsequent_indent: Span<'a>,
/// Allow long words to be broken if they cannot fit on a line.
/// When set to `false`, some lines may be longer than
/// `self.width`.
pub break_words: bool,
}
/// Wraps a [`Text`] block.
///
/// `text` should be broken up into lines at the time it's passed in.
pub(crate) fn wrap_text<'a>(
text: &'a Text<'_>,
options: Options<'a>,
) -> Text<'a> {
let mut lines = Vec::new();
// We currently assume that lines in text don't have embedded newlines in
// them. This assumption might need to be revisited.
for line in &text.lines {
wrap_single_line(line, &options, &mut lines);
}
Text::from(lines)
}
/// Wraps a [`Line`] representing a single line.
///
/// If the text contains multiple lines, use [`wrap_text`] instead.
pub(crate) fn wrap_line<'a>(
line: &'a Line<'_>,
options: Options<'a>,
) -> Text<'a> {
let mut lines = Vec::new();
wrap_single_line(line, &options, &mut lines);
Text::from(lines)
}
fn wrap_single_line<'a>(
line: &'a Line<'_>,
options: &Options<'a>,
lines: &mut Vec<Line<'a>>,
) {
let indent = if lines.is_empty() {
options.initial_indent.clone()
} else {
options.subsequent_indent.clone()
};
if line.width() < options.width && indent.content.is_empty() {
lines.push(borrow_line(line));
} else {
wrap_single_line_slow_path(line, options, lines)
}
}
fn borrow_line<'a>(line: &'a Line<'_>) -> Line<'a> {
let spans = line
.spans
.iter()
.map(|span| Span::styled(span.content.as_ref(), span.style))
.collect::<Vec<_>>();
Line::from(spans)
}
fn wrap_single_line_slow_path<'a>(
line: &'a Line<'_>,
options: &Options<'a>,
lines: &mut Vec<Line<'a>>,
) {
// Span::width (options.initial_indent.width() etc) use the Unicode display
// width, which is what we expect.
let initial_width =
options.width.saturating_sub(options.initial_indent.width());
let subsequent_width =
options.width.saturating_sub(options.subsequent_indent.width());
let line_widths = [initial_width, subsequent_width];
let split_words = find_words_in_line(&line);
// We don't perform any word splitting.
let broken_words = if options.break_words {
let mut broken_words = break_words(split_words, line_widths[1]);
if !options.initial_indent.content.is_empty() {
// Without this, the first word will always go into the
// first line. However, since we break words based on the
// _second_ line width, it can be wrong to unconditionally
// put the first word onto the first line. An empty
// zero-width word fixed this.
broken_words.insert(0, StyledWord::empty());
}
broken_words
} else {
split_words.collect::<Vec<_>>()
};
let f64_line_widths =
line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
// The optimal fit wrap looks nicer, and we're wrapping pretty small amounts
// of text so performance is unlikely to be an issue.
let wrapped_lines =
wrap_optimal_fit(&broken_words, &f64_line_widths, &Penalties::new())
.expect("computation cannot overflow with restricted line widths");
for words in wrapped_lines {
let mut output_line = Vec::new();
if lines.is_empty() && !options.initial_indent.content.is_empty() {
output_line.push(options.initial_indent.clone());
} else if !lines.is_empty()
&& !options.subsequent_indent.content.is_empty()
{
output_line.push(options.subsequent_indent.clone());
}
for (position, word) in words.into_iter().with_position() {
match position {
Position::First | Position::Middle => {
output_line.extend(word.word_span());
output_line.extend(word.whitespace_span());
}
Position::Last | Position::Only => {
// Don't add trailing whitespace, just the content.
output_line.extend(word.word_span());
// We don't support hyphenation at the moment, but if we
// did, this is where they would go.
}
}
}
lines.push(Line::from(output_line));
}
}
fn find_words_in_line<'a>(
line: &'a Line<'_>,
) -> impl Iterator<Item = StyledWord<'a>> {
line.spans.iter().flat_map(|span| find_words_in_span(span))
}
/// Breaks this span into smaller words.
///
/// This assumes the only word breaks are ASCII spaces. In particular, it
/// assume that there are no newlines anywhere within a span.
fn find_words_in_span<'a>(
span: &'a Span<'_>,
) -> impl Iterator<Item = StyledWord<'a>> {
let mut start = 0;
let mut in_whitespace = false;
let mut char_indices = span.content.char_indices();
std::iter::from_fn(move || {
for (idx, ch) in char_indices.by_ref() {
if in_whitespace && ch != ' ' {
let word = StyledWord::new_sub_span(span, start, idx);
start = idx;
in_whitespace = ch == ' ';
return Some(word);
}
in_whitespace = ch == ' ';
}
let content_len = span.content.len();
if start < content_len {
let word = StyledWord::new_sub_span(span, start, content_len);
start = content_len;
return Some(word);
}
None
})
}
/// A word with a style associated with it.
///
/// This is similar to a [`textwrap::core::Word`], except each word also has a
/// style associated with it.
#[derive(Copy, Clone, Debug)]
struct StyledWord<'a> {
word: &'a str,
width: usize,
whitespace: &'a str,
style: ratatui::style::Style,
}
impl<'a> StyledWord<'a> {
#[allow(unused)]
fn new(span: &'a Span<'_>) -> Self {
// We assume the whitespace consists of ' ' only. This allows us to
// compute the display width in constant time.
Self::new_impl(&span.content, span.style)
}
fn new_sub_span(span: &'a Span<'_>, start: usize, end: usize) -> Self {
let content = &span.content[start..end];
Self::new_impl(content, span.style)
}
fn new_impl(content: &'a str, style: ratatui::style::Style) -> Self {
let trimmed = content.trim_end_matches(' ');
Self {
word: trimmed,
width: display_width(trimmed),
whitespace: &content[trimmed.len()..],
style,
}
}
fn empty() -> Self {
Self {
word: "",
width: 0,
whitespace: "",
style: ratatui::style::Style::default(),
}
}
fn word_span(&self) -> Option<Span<'a>> {
(!self.word.is_empty()).then(|| Span::styled(self.word, self.style))
}
fn whitespace_span(&self) -> Option<Span<'a>> {
(!self.whitespace.is_empty())
.then(|| Span::styled(self.whitespace, self.style))
}
/// Break this span into smaller words with a width of at most `line_width`.
/// The whitespace from this `SpanWord` is added to the last piece.
fn break_apart<'b>(
&'b self,
line_width: usize,
) -> impl Iterator<Item = StyledWord<'a>> + 'b {
let mut char_indices = self.word.char_indices();
let mut offset = 0;
let mut width = 0;
std::iter::from_fn(move || {
while let Some((idx, ch)) = char_indices.next() {
if skip_ansi_escape_sequence(
ch,
&mut char_indices.by_ref().map(|(_, ch)| ch),
) {
continue;
}
if width > 0 && width + ch_width(ch) > line_width {
let word = StyledWord {
word: &self.word[offset..idx],
width,
whitespace: "",
style: self.style,
};
offset = idx;
width = ch_width(ch);
return Some(word);
}
width += ch_width(ch);
}
if offset < self.word.len() {
let word = StyledWord {
word: &self.word[offset..],
width,
whitespace: self.whitespace,
style: self.style,
};
offset = self.word.len();
return Some(word);
}
None
})
}
}
impl<'a> Fragment for StyledWord<'a> {
fn width(&self) -> f64 {
// self.width is the display width, which is what we care about here.
self.width as f64
}
fn whitespace_width(&self) -> f64 {
// Since whitespace is always ASCII spaces, this is equal to the number
// of whitespace characters.
self.whitespace.len() as f64
}
fn penalty_width(&self) -> f64 {
// We don't insert hyphens or anything similar else -- just use 0.0
// here.
0.0
}
}
/// Forcibly break spans wider than `line_width` into smaller spans.
///
/// This simply calls [`Span::break_apart`] on spans that are too wide.
fn break_words<'a, I>(spans: I, line_width: usize) -> Vec<StyledWord<'a>>
where
I: IntoIterator<Item = StyledWord<'a>>,
{
let mut shortened_spans = Vec::new();
for span in spans {
if span.width() > line_width as f64 {
shortened_spans.extend(span.break_apart(line_width));
} else {
shortened_spans.push(span);
}
}
shortened_spans
}
/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
/// sequence. This is typically used for colored text and will be
/// ignored when computing the text width.
const CSI: (char, char) = ('\x1b', '[');
/// The final bytes of an ANSI escape sequence must be in this range.
const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
/// Skip ANSI escape sequences. The `ch` is the current `char`, the
/// `chars` provide the following characters. The `chars` will be
/// modified if `ch` is the start of an ANSI escape sequence.
#[inline]
fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(
ch: char,
chars: &mut I,
) -> bool {
if ch == CSI.0 && chars.next() == Some(CSI.1) {
// We have found the start of an ANSI escape code, typically
// used for colored terminal text. We skip until we find a
// "final byte" in the range 0x40–0x7E.
for ch in chars {
if ANSI_FINAL_BYTE.contains(&ch) {
return true;
}
}
}
false
}
fn ch_width(ch: char) -> usize {
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
}
@tindzk
Copy link

tindzk commented Jul 25, 2024

Thanks for this gist!

It works well, but I noticed that subsequent_indent is prepended to the second line even if the first line was not broken up. This can be fixed as follows: https://gist.github.com/tindzk/489f7298eab43f4b174ee62777e93769/revisions

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment