-
-
Notifications
You must be signed in to change notification settings - Fork 137
Add utility function ansi::slice_ansi_str
#206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| use alloc::borrow::Cow; | ||
| use core::ops::Range; | ||
| use core::{ | ||
| fmt::{self, Debug, Formatter}, | ||
| sync::atomic::{AtomicBool, Ordering}, | ||
|
|
@@ -890,78 +891,124 @@ pub(crate) fn char_width(_c: char) -> usize { | |
| 1 | ||
| } | ||
|
|
||
| /// Truncates a string to a certain number of characters. | ||
| /// Slice a `&str` in terms of text width. This means that only the text | ||
| /// columns strictly between `start` and `stop` will be kept. | ||
| /// | ||
| /// This ensures that escape codes are not screwed up in the process. | ||
| /// If the maximum length is hit the string will be truncated but | ||
| /// escapes code will still be honored. If truncation takes place | ||
| /// the tail string will be appended. | ||
| pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { | ||
| if measure_text_width(s) <= width { | ||
| return Cow::Borrowed(s); | ||
| } | ||
|
|
||
| /// If a multi-columns character overlaps with the end of the interval it will | ||
| /// not be included. In such a case, the result will be less than `end - start` | ||
| /// columns wide. | ||
| /// | ||
| /// This ensures that escape codes are not screwed up in the process. And if | ||
| /// non-empty head and tail are specified, they are inserted between the ANSI | ||
| /// codes from truncated bounds and the slice. | ||
| pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> { | ||
| #[cfg(feature = "ansi-parsing")] | ||
| { | ||
| use core::cmp::Ordering; | ||
| let mut iter = AnsiCodeIterator::new(s); | ||
| let mut length = 0; | ||
| let mut rv = None; | ||
|
|
||
| while let Some(item) = iter.next() { | ||
| match item { | ||
| (s, false) => { | ||
| if rv.is_none() { | ||
| if str_width(s) + length > width.saturating_sub(str_width(tail)) { | ||
| let ts = iter.current_slice(); | ||
|
|
||
| let mut s_byte = 0; | ||
| let mut s_width = 0; | ||
| let rest_width = | ||
| width.saturating_sub(str_width(tail)).saturating_sub(length); | ||
| for c in s.chars() { | ||
| s_byte += c.len_utf8(); | ||
| s_width += char_width(c); | ||
| match s_width.cmp(&rest_width) { | ||
| Ordering::Equal => break, | ||
| Ordering::Greater => { | ||
| s_byte -= c.len_utf8(); | ||
| break; | ||
| } | ||
| Ordering::Less => continue, | ||
| } | ||
| } | ||
|
|
||
| let idx = ts.len() - s.len() + s_byte; | ||
| let mut buf = ts[..idx].to_string(); | ||
| buf.push_str(tail); | ||
| rv = Some(buf); | ||
| } | ||
| length += str_width(s); | ||
| let mut pos = 0; // Current search index by width | ||
| let mut code_iter = AnsiCodeIterator::new(s).peekable(); | ||
|
|
||
| // Search for the begining of the slice while collecting heading ANSI | ||
| // codes | ||
| let mut front_ansi = String::new(); // ANSI codes found before bound start | ||
| let mut slice_start = 0; // Current search index by bytes | ||
|
|
||
| // Extract the leading slice, which *may be mutated* to remove just its first character. | ||
| 'search_slice_start: while pos < bounds.start { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm counting 5 loops here, two of which are nested. Fundamentally, why is that necessary? It seems like there is more complexity here than is warranted.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This feels a bit complex but I remember this came from my choice to not compromise on performance because this function would be included in most hot loops. This could be written a bit simpler but this is the best way I found to use a single "pos" cursor that can only go forward.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But you are now allocating for |
||
| let Some((sub, is_ansi)) = code_iter.peek_mut() else { | ||
| break; | ||
| }; | ||
|
|
||
| if *is_ansi { | ||
| // Keep track of leading ANSI for later output. | ||
| front_ansi.push_str(sub); | ||
| slice_start += sub.len(); | ||
| } else { | ||
| for (c_idx, c) in sub.char_indices() { | ||
| if pos >= bounds.start { | ||
| // Ensure we don't drop the remaining of the slice before searching for the | ||
| // end bound. | ||
| *sub = &sub[c_idx..]; | ||
| break 'search_slice_start; | ||
| } | ||
|
|
||
| pos += char_width(c); | ||
| slice_start += c.len_utf8(); | ||
| } | ||
| (s, true) => { | ||
| if let Some(ref mut rv) = rv { | ||
| rv.push_str(s); | ||
| } | ||
| } | ||
|
|
||
| code_iter.next(); | ||
| } | ||
|
|
||
| // Search for the end of the slice. This loop is a bit simpler because we don't need to | ||
| // keep track of remaining characters if we cut in the middle of a non-ANSI slice. | ||
| let mut slice_end = slice_start; | ||
|
|
||
| 'search_slice_end: for (sub, is_ansi) in &mut code_iter { | ||
| if is_ansi { | ||
| // Keep ANSI in the output slice but don't account for them in the total width. | ||
| slice_end += sub.len(); | ||
| continue; | ||
| } | ||
|
|
||
| for c in sub.chars() { | ||
| let c_width = char_width(c); | ||
|
|
||
| if pos + c_width > bounds.end { | ||
| // We will only search for ANSI codes after breaking this | ||
| // loop, so we can safely drop the remaining of `sub` | ||
| break 'search_slice_end; | ||
| } | ||
|
|
||
| pos += c_width; | ||
| slice_end += c.len_utf8(); | ||
| } | ||
| } | ||
|
|
||
| if let Some(buf) = rv { | ||
| Cow::Owned(buf) | ||
| } else { | ||
| Cow::Borrowed(s) | ||
| // Initialise the result (before appending remaining ANSI slices) | ||
| let slice = &s[slice_start..slice_end]; | ||
|
|
||
| let mut result = { | ||
| if front_ansi.is_empty() && head.is_empty() && tail.is_empty() { | ||
| // No allocation may have to be performed if there are no bounds. | ||
| Cow::Borrowed(slice) | ||
| } else { | ||
| Cow::Owned(front_ansi + head + slice + tail) | ||
| } | ||
| }; | ||
|
|
||
| // Push back remaining ANSI codes to result | ||
| for (sub, is_ansi) in code_iter { | ||
| if is_ansi { | ||
| result.to_mut().push_str(sub); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| result | ||
| } | ||
| #[cfg(not(feature = "ansi-parsing"))] | ||
| { | ||
| Cow::Owned(format!( | ||
| "{}{}", | ||
| &s[..width.saturating_sub(tail.len())], | ||
| tail | ||
| )) | ||
| let slice = s.get(bounds).unwrap_or(""); | ||
|
|
||
| if head.is_empty() && tail.is_empty() { | ||
| Cow::Borrowed(slice) | ||
| } else { | ||
| Cow::Owned(format!("{head}{slice}{tail}")) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Truncates a string to a certain number of characters. | ||
| /// | ||
| /// This ensures that escape codes are not screwed up in the process. | ||
| /// If the maximum length is hit the string will be truncated but | ||
| /// escapes code will still be honored. If truncation takes place | ||
| /// the tail string will be appended. | ||
| pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { | ||
| if measure_text_width(s) <= width { | ||
| Cow::Borrowed(s) | ||
| } else { | ||
| let tail_width = measure_text_width(tail); | ||
| slice_str(s, "", 0..width.saturating_sub(tail_width), tail) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1089,6 +1136,57 @@ fn test_truncate_str() { | |
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_slice_ansi_str() { | ||
| // Note that 🐶 is two columns wide | ||
| let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!"; | ||
| assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, ">>>", 0..test_str.len(), "<<<"), | ||
| format!(">>>{test_str}<<<"), | ||
| ); | ||
|
|
||
| if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") { | ||
| assert_eq!(measure_text_width(test_str), 16); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, "", 5..5, ""), | ||
| "\u{1b}[31m\u{1b}[1m\u{1b}[0m" | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, "", 0..5, ""), | ||
| "Hello\x1b[31m\x1b[1m\x1b[0m" | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, "", 0..6, ""), | ||
| "Hello\x1b[31m\x1b[1m\x1b[0m" | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, "", 0..7, ""), | ||
| "Hello\x1b[31m🐶\x1b[1m\x1b[0m" | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, "", 4..9, ""), | ||
| "o\x1b[31m🐶\x1b[1m🐶\x1b[0m" | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, "", 7..21, ""), | ||
| "\x1b[31m\x1b[1m🐶\x1b[0m world!" | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| slice_str(test_str, ">>>", 7..21, "<<<"), | ||
| "\x1b[31m>>>\x1b[1m🐶\x1b[0m world!<<<" | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_truncate_str_no_ansi() { | ||
| assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar"); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not
stopbutendpresumably.I think this docstring could use a few examples to make the expected behavior more clear.