diff --git a/CHANGELOG.md b/CHANGELOG.md index d7782095395..490e204adea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ - Fix analysis namespace parsing after the Yojson migration. https://github.com/rescript-lang/rescript/pull/8454 - Fix namespaced reference lookup in editor analysis. https://github.com/rescript-lang/rescript/pull/8455 - Fix analysis segmentation fault for references after https://github.com/rescript-lang/rescript/pull/7887. https://github.com/rescript-lang/rescript/pull/8477 +- Fix build crash when the compiler emits output that is not valid UTF-8, such as a truncated multibyte character in a code frame. https://github.com/rescript-lang/rescript/pull/8482 #### :memo: Documentation diff --git a/rewatch/src/build/compile.rs b/rewatch/src/build/compile.rs index 668da7857ab..b7da110ae87 100644 --- a/rewatch/src/build/compile.rs +++ b/rewatch/src/build/compile.rs @@ -25,6 +25,15 @@ use std::sync::mpsc; use std::time::SystemTime; use tracing::{info_span, instrument}; +/// Decode captured compiler output (stdout or stderr) into a String. +/// +/// The output is not guaranteed to be valid UTF-8: a code frame can truncate a +/// multi-byte character. Decode lossily so a bad byte becomes a replacement +/// character instead of crashing the build. +fn compiler_output_to_string(bytes: &[u8]) -> String { + String::from_utf8_lossy(bytes).to_string() +} + /// Execute js-post-build command for a compiled JavaScript file. /// The command runs in the directory containing the rescript.json that defines it. /// The absolute path to the JS file is passed as an argument. @@ -1039,9 +1048,7 @@ fn compile_file( "Could not compile file. Error: {e}. Path to AST: {ast_path:?}" )), Ok(x) => { - let err = std::str::from_utf8(&x.stderr) - .expect("stdout should be non-null") - .to_string(); + let err = compiler_output_to_string(&x.stderr); let dir = Path::new(implementation_file_path).parent().unwrap(); @@ -1346,6 +1353,16 @@ mod tests { use std::time::SystemTime; use tempfile::TempDir; + // The compiler can write a code frame that truncates a multi-byte character, so the + // captured output is not always valid UTF-8. Decoding it must not panic. + #[test] + fn compiler_output_to_string_handles_invalid_utf8() { + // 0xe2 0x80 is the start of an em dash (U+2014); the third byte is missing. + let truncated = [b'W', b'a', b'r', b'n', b'i', b'n', b'g', b' ', 0xe2, 0x80]; + let decoded = compiler_output_to_string(&truncated); + assert!(decoded.starts_with("Warning ")); + } + fn test_project_context(root: &Path) -> ProjectContext { let config = config::tests::create_config(config::tests::CreateConfigArgs { name: "test-root".to_string(),