From c4f7bef7d6dd09d7388ebcd624d664f029987848 Mon Sep 17 00:00:00 2001 From: Nathan Tranquilla Date: Sun, 21 Jun 2026 19:28:02 -0300 Subject: [PATCH 1/3] test: show the build panics on non-utf8 compiler output bsc can print a code frame that cuts a multi-byte character in half, so the captured stderr is not always valid utf-8. The Ok branch decodes it with from_utf8().expect(), which panics and takes down the whole build with a byte offset into stdout and no file name. This test feeds a truncated em dash and reproduces the panic. --- rewatch/src/build/compile.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/rewatch/src/build/compile.rs b/rewatch/src/build/compile.rs index 668da7857ab..0ff89a734a9 100644 --- a/rewatch/src/build/compile.rs +++ b/rewatch/src/build/compile.rs @@ -25,6 +25,13 @@ use std::sync::mpsc; use std::time::SystemTime; use tracing::{info_span, instrument}; +/// Decode captured compiler output (stdout or stderr) into a String. +fn compiler_output_to_string(bytes: &[u8]) -> String { + std::str::from_utf8(bytes) + .expect("stdout should be non-null") + .to_string() +} + /// Execute js-post-build command for a compiled JavaScript file. /// The command runs in the directory containing the rescript.json that defines it. /// The absolute path to the JS file is passed as an argument. @@ -1039,9 +1046,7 @@ fn compile_file( "Could not compile file. Error: {e}. Path to AST: {ast_path:?}" )), Ok(x) => { - let err = std::str::from_utf8(&x.stderr) - .expect("stdout should be non-null") - .to_string(); + let err = compiler_output_to_string(&x.stderr); let dir = Path::new(implementation_file_path).parent().unwrap(); @@ -1346,6 +1351,16 @@ mod tests { use std::time::SystemTime; use tempfile::TempDir; + // The compiler can write a code frame that truncates a multi-byte character, so the + // captured output is not always valid UTF-8. Decoding it must not panic. + #[test] + fn compiler_output_to_string_handles_invalid_utf8() { + // 0xe2 0x80 is the start of an em dash (U+2014); the third byte is missing. + let truncated = [b'W', b'a', b'r', b'n', b'i', b'n', b'g', b' ', 0xe2, 0x80]; + let decoded = compiler_output_to_string(&truncated); + assert!(decoded.starts_with("Warning ")); + } + fn test_project_context(root: &Path) -> ProjectContext { let config = config::tests::create_config(config::tests::CreateConfigArgs { name: "test-root".to_string(), From f62e9ff9aa7fb8ac160663714976ea9adcc444c6 Mon Sep 17 00:00:00 2001 From: Nathan Tranquilla Date: Sun, 21 Jun 2026 19:28:44 -0300 Subject: [PATCH 2/3] fix: decode compiler output with from_utf8_lossy The Ok branch used from_utf8().expect() on the captured stderr, which panics when the output is not valid utf-8. The two sibling branches right above it already use from_utf8_lossy. Use it here too so a truncated character turns into a replacement character and the build keeps going instead of crashing. --- rewatch/src/build/compile.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rewatch/src/build/compile.rs b/rewatch/src/build/compile.rs index 0ff89a734a9..b7da110ae87 100644 --- a/rewatch/src/build/compile.rs +++ b/rewatch/src/build/compile.rs @@ -26,10 +26,12 @@ use std::time::SystemTime; use tracing::{info_span, instrument}; /// Decode captured compiler output (stdout or stderr) into a String. +/// +/// The output is not guaranteed to be valid UTF-8: a code frame can truncate a +/// multi-byte character. Decode lossily so a bad byte becomes a replacement +/// character instead of crashing the build. fn compiler_output_to_string(bytes: &[u8]) -> String { - std::str::from_utf8(bytes) - .expect("stdout should be non-null") - .to_string() + String::from_utf8_lossy(bytes).to_string() } /// Execute js-post-build command for a compiled JavaScript file. From 87b33197172f7bfe2ccee28f7d0a14f70cfc39d4 Mon Sep 17 00:00:00 2001 From: Nathan Tranquilla Date: Mon, 22 Jun 2026 11:55:57 -0300 Subject: [PATCH 3/3] Add changelog entry for the non-UTF-8 build crash fix --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7782095395..490e204adea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ - Fix analysis namespace parsing after the Yojson migration. https://github.com/rescript-lang/rescript/pull/8454 - Fix namespaced reference lookup in editor analysis. https://github.com/rescript-lang/rescript/pull/8455 - Fix analysis segmentation fault for references after https://github.com/rescript-lang/rescript/pull/7887. https://github.com/rescript-lang/rescript/pull/8477 +- Fix build crash when the compiler emits output that is not valid UTF-8, such as a truncated multibyte character in a code frame. https://github.com/rescript-lang/rescript/pull/8482 #### :memo: Documentation