Wrap text in a unicode-aware way
This commit is contained in:
parent
8fae7d2bf1
commit
37634139b0
5 changed files with 164 additions and 0 deletions
|
|
@ -5,5 +5,6 @@ edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
crossterm = "0.23.2"
|
crossterm = "0.23.2"
|
||||||
|
unicode-linebreak = "0.1.2"
|
||||||
unicode-segmentation = "1.9.0"
|
unicode-segmentation = "1.9.0"
|
||||||
unicode-width = "0.1.9"
|
unicode-width = "0.1.9"
|
||||||
|
|
|
||||||
60
examples/text_wrapping.rs
Normal file
60
examples/text_wrapping.rs
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
use crossterm::event::Event;
|
||||||
|
use crossterm::style::ContentStyle;
|
||||||
|
use toss::frame::{Frame, Pos};
|
||||||
|
use toss::terminal::{Redraw, Terminal};
|
||||||
|
|
||||||
|
fn draw(f: &mut Frame) {
|
||||||
|
let text = concat!(
|
||||||
|
"This is a short paragraph in order to demonstrate unicode-aware word wrapping. ",
|
||||||
|
"Resize your terminal to different widths to try it out. ",
|
||||||
|
"After this sentence come two newlines, so it should always break here.\n",
|
||||||
|
"\n",
|
||||||
|
"Since the wrapping algorithm is aware of the Unicode Standard Annex #14, ",
|
||||||
|
"it understands things like nonbreaking spaces: ",
|
||||||
|
"This\u{00a0}sentence\u{00a0}is\u{00a0}separated\u{00a0}by\u{00a0}nonbreaking\u{00a0}spaces.\n",
|
||||||
|
"\n",
|
||||||
|
"It can also properly handle wide graphemes (like emoji 🤔), ",
|
||||||
|
"including ones usually displayed incorrectly by terminal emulators, like 👩🔬 (a female scientist emoji).",
|
||||||
|
);
|
||||||
|
// TODO Actually use nbsp
|
||||||
|
|
||||||
|
let breaks = f.wrap(text, f.size().width.into());
|
||||||
|
let lines = toss::split_at_indices(text, &breaks);
|
||||||
|
for (i, line) in lines.iter().enumerate() {
|
||||||
|
f.write(
|
||||||
|
Pos::new(0, i as i32),
|
||||||
|
line.trim_end(),
|
||||||
|
ContentStyle::default(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_frame(term: &mut Terminal) {
|
||||||
|
loop {
|
||||||
|
// Must be called before rendering, otherwise the terminal has out-of-date
|
||||||
|
// size information and will present garbage.
|
||||||
|
term.autoresize().unwrap();
|
||||||
|
|
||||||
|
draw(term.frame());
|
||||||
|
|
||||||
|
if term.present().unwrap() == Redraw::NotRequired {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Automatically enters alternate screen and enables raw mode
|
||||||
|
let mut term = Terminal::new().unwrap();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Render and display a frame. A full frame is displayed on the terminal
|
||||||
|
// once this function exits.
|
||||||
|
render_frame(&mut term);
|
||||||
|
|
||||||
|
// Exit if the user presses any buttons
|
||||||
|
if !matches!(crossterm::event::read().unwrap(), Event::Resize(_, _)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -5,6 +5,7 @@ use crossterm::style::ContentStyle;
|
||||||
use crate::buffer::Buffer;
|
use crate::buffer::Buffer;
|
||||||
pub use crate::buffer::{Pos, Size};
|
pub use crate::buffer::{Pos, Size};
|
||||||
use crate::widthdb::WidthDB;
|
use crate::widthdb::WidthDB;
|
||||||
|
use crate::wrap;
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Frame {
|
pub struct Frame {
|
||||||
|
|
@ -55,6 +56,10 @@ impl Frame {
|
||||||
self.widthdb.width(s)
|
self.widthdb.width(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn wrap(&mut self, text: &str, width: usize) -> Vec<usize> {
|
||||||
|
wrap::wrap(text, width, &mut self.widthdb)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn write(&mut self, pos: Pos, content: &str, style: ContentStyle) {
|
pub fn write(&mut self, pos: Pos, content: &str, style: ContentStyle) {
|
||||||
self.buffer.write(&mut self.widthdb, pos, content, style);
|
self.buffer.write(&mut self.widthdb, pos, content, style);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,3 +2,6 @@ mod buffer;
|
||||||
pub mod frame;
|
pub mod frame;
|
||||||
pub mod terminal;
|
pub mod terminal;
|
||||||
mod widthdb;
|
mod widthdb;
|
||||||
|
mod wrap;
|
||||||
|
|
||||||
|
pub use wrap::split_at_indices;
|
||||||
|
|
|
||||||
95
src/wrap.rs
Normal file
95
src/wrap.rs
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
//! Word wrapping for text.
|
||||||
|
|
||||||
|
use unicode_linebreak::BreakOpportunity;
|
||||||
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
|
use crate::widthdb::WidthDB;
|
||||||
|
|
||||||
|
// TODO Handle tabs separately?
|
||||||
|
// TODO Convert into an iterator?
|
||||||
|
pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
|
||||||
|
let mut breaks = vec![];
|
||||||
|
|
||||||
|
let mut break_options = unicode_linebreak::linebreaks(text).peekable();
|
||||||
|
|
||||||
|
// The last valid break point encountered and its width
|
||||||
|
let mut valid_break = None;
|
||||||
|
let mut valid_break_width = 0;
|
||||||
|
|
||||||
|
// Width of the line at the current grapheme
|
||||||
|
let mut current_width = 0;
|
||||||
|
|
||||||
|
for (gi, g) in text.grapheme_indices(true) {
|
||||||
|
// Advance break options
|
||||||
|
let (bi, b) = loop {
|
||||||
|
let (bi, b) = break_options.peek().expect("not at end of string yet");
|
||||||
|
if *bi < gi {
|
||||||
|
break_options.next();
|
||||||
|
} else {
|
||||||
|
break (*bi, b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Evaluate break options at the current position
|
||||||
|
if bi == gi {
|
||||||
|
match b {
|
||||||
|
BreakOpportunity::Mandatory => {
|
||||||
|
breaks.push(bi);
|
||||||
|
valid_break = None;
|
||||||
|
valid_break_width = 0;
|
||||||
|
current_width = 0;
|
||||||
|
}
|
||||||
|
BreakOpportunity::Allowed => {
|
||||||
|
valid_break = Some(bi);
|
||||||
|
valid_break_width = current_width;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let grapheme_width: usize = widthdb.grapheme_width(g).into();
|
||||||
|
if current_width + grapheme_width > width {
|
||||||
|
if current_width == 0 {
|
||||||
|
// The grapheme is wider than the maximum width, so we'll allow
|
||||||
|
// it, thereby forcing the following grapheme to break no matter
|
||||||
|
// what (either because of a mandatory or allowed break, or via
|
||||||
|
// a forced break).
|
||||||
|
} else if let Some(bi) = valid_break {
|
||||||
|
// We can't fit the grapheme onto the current line, so we'll
|
||||||
|
// just break at the last valid break point.
|
||||||
|
breaks.push(bi);
|
||||||
|
current_width -= valid_break_width;
|
||||||
|
valid_break = None;
|
||||||
|
valid_break_width = 0;
|
||||||
|
} else {
|
||||||
|
// Forced break in the midde of a normally non-breakable chunk
|
||||||
|
// because there have been no valid break points yet.
|
||||||
|
breaks.push(gi);
|
||||||
|
valid_break = None;
|
||||||
|
valid_break_width = 0;
|
||||||
|
current_width = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
current_width += grapheme_width;
|
||||||
|
}
|
||||||
|
|
||||||
|
breaks
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn split_at_indices<'a>(s: &'a str, indices: &[usize]) -> Vec<&'a str> {
|
||||||
|
let mut slices = vec![];
|
||||||
|
|
||||||
|
let mut rest = s;
|
||||||
|
let mut offset = 0;
|
||||||
|
|
||||||
|
for i in indices {
|
||||||
|
let (left, right) = rest.split_at(i - offset);
|
||||||
|
slices.push(left);
|
||||||
|
rest = right;
|
||||||
|
offset = *i;
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.push(rest);
|
||||||
|
|
||||||
|
slices
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue