Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 117 additions & 2 deletions crates/forge_infra/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,20 +190,62 @@ async fn stream<A: AsyncReadExt + Unpin, W: Write>(
let mut output = Vec::new();
if let Some(io) = io.as_mut() {
let mut buff = [0; 1024];
// Carry incomplete trailing UTF-8 codepoint bytes across reads — Windows
// console stdio rejects even one byte of a split codepoint.
let mut pending = Vec::<u8>::new();
loop {
let n = io.read(&mut buff).await?;
if n == 0 {
break;
}
writer.write_all(buff.get(..n).unwrap_or(&[]))?;
let chunk = buff.get(..n).unwrap_or(&[]);
output.extend_from_slice(chunk);

let mut working = std::mem::take(&mut pending);
working.extend_from_slice(chunk);
pending = write_lossy_utf8(&mut writer, &working)?;
// note: flush is necessary else we get the cursor could not be found error.
writer.flush()?;
output.extend_from_slice(buff.get(..n).unwrap_or(&[]));
}
// Flush dangling bytes from a stream that ended mid-codepoint.
if !pending.is_empty() {
writer.write_all(String::from_utf8_lossy(&pending).as_bytes())?;
writer.flush()?;
}
}
Ok(output)
}

/// Writes `buf` as valid UTF-8 (invalid bytes → `U+FFFD`) and returns any
/// incomplete trailing codepoint bytes for the caller to carry into the next
/// chunk.
fn write_lossy_utf8<W: Write>(writer: &mut W, buf: &[u8]) -> io::Result<Vec<u8>> {
let mut cursor = 0;
while cursor < buf.len() {
match std::str::from_utf8(buf.get(cursor..).unwrap_or(&[])) {
Ok(tail) => {
writer.write_all(tail.as_bytes())?;
return Ok(Vec::new());
}
Err(e) => {
let valid_up_to = e.valid_up_to();
let valid = buf.get(cursor..cursor + valid_up_to).unwrap_or(&[]);
writer.write_all(valid)?;
match e.error_len() {
Some(len) => {
writer.write_all("\u{FFFD}".as_bytes())?;
cursor += valid_up_to + len;
}
None => {
return Ok(buf.get(cursor + valid_up_to..).unwrap_or(&[]).to_vec());
}
}
}
}
}
Ok(Vec::new())
}

/// The implementation for CommandExecutorService
#[async_trait::async_trait]
impl CommandInfra for ForgeCommandExecutorService {
Expand Down Expand Up @@ -429,4 +471,77 @@ mod tests {
assert_eq!(actual.stderr, expected.stderr);
assert_eq!(actual.success(), expected.success());
}

mod write_lossy_utf8 {
use pretty_assertions::assert_eq;

use super::super::write_lossy_utf8;

fn run(buf: &[u8]) -> (Vec<u8>, Vec<u8>) {
let mut out = Vec::<u8>::new();
let pending = write_lossy_utf8(&mut out, buf).unwrap();
(out, pending)
}

#[test]
fn valid_ascii_passes_through() {
let (out, pending) = run(b"hello");
assert_eq!(out, b"hello");
assert!(pending.is_empty());
}

#[test]
fn valid_multibyte_passes_through() {
// "héllo ✓" — mixed 2-byte and 3-byte codepoints.
let input = "héllo ✓".as_bytes();
let (out, pending) = run(input);
assert_eq!(out, input);
assert!(pending.is_empty());
}

#[test]
fn incomplete_trailing_codepoint_is_buffered() {
// "é" is 0xC3 0xA9 — leading byte alone must be held back.
let (out, pending) = run(&[b'a', 0xC3]);
assert_eq!(out, b"a");
assert_eq!(pending, vec![0xC3]);
}

#[test]
fn multibyte_split_across_two_chunks_emits_once_whole() {
let mut out = Vec::<u8>::new();
let pending = write_lossy_utf8(&mut out, &[b'a', 0xC3]).unwrap();
assert_eq!(pending, vec![0xC3]);
assert_eq!(out, b"a");

let mut working = pending;
working.push(0xA9);
let pending = write_lossy_utf8(&mut out, &working).unwrap();
assert!(pending.is_empty());
assert_eq!(out, "aé".as_bytes());
}

#[test]
fn invalid_byte_in_middle_becomes_replacement() {
let (out, pending) = run(&[b'a', 0xFF, b'b']);
assert_eq!(out, "a\u{FFFD}b".as_bytes());
assert!(pending.is_empty());
}

#[test]
fn lone_continuation_byte_becomes_replacement() {
let (out, pending) = run(&[b'a', 0x80, b'b']);
assert_eq!(out, "a\u{FFFD}b".as_bytes());
assert!(pending.is_empty());
}

#[test]
fn windows_1252_smart_quote_becomes_replacement() {
// Regression: 0x91/0x92 land as bare continuation bytes and broke
// console stdio on Windows before this fix.
let (out, pending) = run(b"quote: \x91hi\x92");
assert_eq!(out, "quote: \u{FFFD}hi\u{FFFD}".as_bytes());
assert!(pending.is_empty());
}
}
}
Loading