From 0e1be596760e53856ef41ab1e7980dd7b984793b Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Fri, 24 Oct 2025 01:58:48 +0200 Subject: [PATCH 1/3] util(cksum): Avoid collecting a file vector preemptively --- src/uu/cksum/src/cksum.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 7d595e625ac..ce5b9316206 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -267,6 +267,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { return Err(ChecksumError::AlgorithmNotSupportedWithCheck.into()); } + let files = matches.get_many::(options::FILE).map_or_else( + // No files given, read from stdin. + || Box::new(iter::once(OsStr::new("-"))) as Box>, + // At least one file given, read from them. + |files| Box::new(files.map(OsStr::new)) as Box>, + ); + if check { let text_flag = matches.get_flag(options::TEXT); let binary_flag = matches.get_flag(options::BINARY); @@ -290,11 +297,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Execute the checksum validation based on the presence of files or the use of stdin - let files = matches.get_many::(options::FILE).map_or_else( - || iter::once(OsStr::new("-")).collect::>(), - |files| files.map(OsStr::new).collect::>(), - ); - let verbose = ChecksumVerbose::new(status, quiet, warn); let opts = ChecksumOptions { binary: binary_flag, @@ -303,7 +305,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { verbose, }; - return perform_checksum_validation(files.iter().copied(), algo_option, length, opts); + return perform_checksum_validation(files, algo_option, length, opts); } let (tag, asterisk) = handle_tag_text_binary_flags(std::env::args_os())?; @@ -330,10 +332,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { line_ending, }; - match matches.get_many::(options::FILE) { - Some(files) => cksum(opts, files.map(OsStr::new))?, - None => cksum(opts, iter::once(OsStr::new("-")))?, - } + cksum(opts, files)?; Ok(()) } From 485fd8e207024fae6c799861011d4af8ba7a6e7b Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Sat, 25 Oct 2025 21:56:21 +0200 Subject: [PATCH 2/3] util(cksum): Fix non-idiomatic comments --- src/uu/cksum/src/cksum.rs | 8 +++----- src/uucore/src/lib/features/checksum.rs | 4 +--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index ce5b9316206..405412911fc 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -203,11 +203,9 @@ mod options { pub const ZERO: &str = "zero"; } -/*** - * cksum has a bunch of legacy behavior. - * We handle this in this function to make sure they are self contained - * and "easier" to understand - */ +/// cksum has a bunch of legacy behavior. +/// We handle this in this function to make sure they are self contained +/// and "easier" to understand fn handle_tag_text_binary_flags>( args: impl Iterator, ) -> UResult<(bool, bool)> { diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 44293ca0deb..1ce88cf5984 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -1115,9 +1115,7 @@ fn process_checksum_file( Ok(()) } -/*** - * Do the checksum validation (can be strict or not) -*/ +/// Do the checksum validation (can be strict or not) pub fn perform_checksum_validation<'a, I>( files: I, algo_name_input: Option<&str>, From a4f4542467200ab4acb18bc2ae7997c2706675ae Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Sun, 26 Oct 2025 00:50:35 +0200 Subject: [PATCH 3/3] util(cksum): Fix behavior with --text and --untagged, and prepare hashsum/cksum merge refactor --- src/uu/cksum/src/cksum.rs | 342 ++++++++++++++++-------- src/uucore/src/lib/features/checksum.rs | 9 + tests/by-util/test_cksum.rs | 71 ++++- 3 files changed, 304 insertions(+), 118 deletions(-) diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 405412911fc..bdf94a4997d 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -15,8 +15,8 @@ use std::path::Path; use uucore::checksum::{ ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BSD, ALGORITHM_OPTIONS_CRC, ALGORITHM_OPTIONS_CRC32B, ALGORITHM_OPTIONS_SYSV, ChecksumError, ChecksumOptions, - ChecksumVerbose, SUPPORTED_ALGORITHMS, calculate_blake2b_length, detect_algo, digest_reader, - perform_checksum_validation, + ChecksumVerbose, HashAlgorithm, LEGACY_ALGORITHMS, SUPPORTED_ALGORITHMS, + calculate_blake2b_length, detect_algo, digest_reader, perform_checksum_validation, }; use uucore::translate; @@ -29,63 +29,189 @@ use uucore::{ sum::Digest, }; -#[derive(Debug, PartialEq)] -enum OutputFormat { - Hexadecimal, - Raw, - Base64, -} - struct Options { algo_name: &'static str, digest: Box, output_bits: usize, - tag: bool, // will cover the --untagged option length: Option, output_format: OutputFormat, - asterisk: bool, // if we display an asterisk or not (--binary/--text) line_ending: LineEnding, } +/// Reading mode used to compute digest. +/// +/// On most linux systems, this is irrelevant, as there is no distinction +/// between text and binary files. Refer to GNU's cksum documentation for more +/// information. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ReadingMode { + Binary, + Text, +} + +impl ReadingMode { + #[inline] + fn as_char(&self) -> char { + match self { + Self::Binary => '*', + Self::Text => ' ', + } + } +} + +/// Whether to write the digest as hexadecimal or encoded in base64. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum DigestFormat { + Hexadecimal, + Base64, +} + +impl DigestFormat { + #[inline] + fn is_base64(&self) -> bool { + *self == Self::Base64 + } +} + +/// Holds the representation that shall be used for printing a checksum line +#[derive(Debug, PartialEq, Eq)] +enum OutputFormat { + /// Raw digest + Raw, + + /// Selected for older algorithms which had their custom formatting + /// + /// Default for crc, sysv, bsd + Legacy, + + /// `$ALGO_NAME ($FILENAME) = $DIGEST` + Tagged(DigestFormat), + + /// '$DIGEST $FLAG$FILENAME' + /// where 'flag' depends on the reading mode + /// + /// Default for standalone checksum utilities + Untagged(DigestFormat, ReadingMode), +} + +impl OutputFormat { + #[inline] + fn is_raw(&self) -> bool { + *self == Self::Raw + } +} + +fn print_legacy_checksum( + options: &Options, + filename: &OsStr, + sum: &str, + size: usize, +) -> UResult<()> { + debug_assert!(LEGACY_ALGORITHMS.contains(&options.algo_name)); + + // Print the sum + match options.algo_name { + ALGORITHM_OPTIONS_SYSV => print!( + "{} {}", + sum.parse::().unwrap(), + size.div_ceil(options.output_bits), + ), + ALGORITHM_OPTIONS_BSD => { + // The BSD checksum output is 5 digit integer + let bsd_width = 5; + print!( + "{:0bsd_width$} {:bsd_width$}", + sum.parse::().unwrap(), + size.div_ceil(options.output_bits), + ); + } + ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_CRC32B => { + print!("{sum} {size}"); + } + _ => unreachable!("Not a legacy algorithm"), + }; + + // Print the filename after a space if not stdin + if filename != "-" { + print!(" "); + let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); + } + + Ok(()) +} + +fn print_tagged_checksum(options: &Options, filename: &OsStr, sum: &String) -> UResult<()> { + // Print algo name and opening parenthesis. + print!( + "{} (", + match (options.algo_name, options.length) { + // Multiply the length by 8, as we want to print the length in bits. + (ALGORITHM_OPTIONS_BLAKE2B, Some(l)) => format!("BLAKE2b-{}", l * 8), + (ALGORITHM_OPTIONS_BLAKE2B, None) => "BLAKE2b".into(), + (name, _) => name.to_ascii_uppercase(), + } + ); + + // Print filename + let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); + + // Print closing parenthesis and sum + print!(") = {sum}"); + + Ok(()) +} + +fn print_untagged_checksum( + filename: &OsStr, + sum: &String, + reading_mode: ReadingMode, +) -> UResult<()> { + // Print checksum and reading mode flag + print!("{sum} {}", reading_mode.as_char()); + + // Print filename + let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); + + Ok(()) +} + /// Calculate checksum /// /// # Arguments /// /// * `options` - CLI options for the assigning checksum algorithm /// * `files` - A iterator of [`OsStr`] which is a bunch of files that are using for calculating checksum -#[allow(clippy::cognitive_complexity)] fn cksum<'a, I>(mut options: Options, files: I) -> UResult<()> where I: Iterator, { let files: Vec<_> = files.collect(); - if options.output_format == OutputFormat::Raw && files.len() > 1 { + + if options.output_format.is_raw() && files.len() > 1 { return Err(Box::new(ChecksumError::RawMultipleFiles)); } for filename in files { - let filename = Path::new(filename); + let filepath = Path::new(filename); let stdin_buf; let file_buf; - let is_stdin = filename == OsStr::new("-"); - - if filename.is_dir() { + if filepath.is_dir() { show!(USimpleError::new( 1, - translate!("cksum-error-is-directory", "file" => filename.display()) + translate!("cksum-error-is-directory", "file" => filepath.display()) )); continue; } // Handle the file input - let mut file = BufReader::new(if is_stdin { + let mut file = BufReader::new(if filename == "-" { stdin_buf = stdin(); Box::new(stdin_buf) as Box } else { - file_buf = match File::open(filename) { + file_buf = match File::open(filepath) { Ok(file) => file, Err(err) => { - show!(err.map_err_context(|| filename.to_string_lossy().to_string())); + show!(err.map_err_context(|| filepath.to_string_lossy().to_string())); continue; } }; @@ -96,7 +222,16 @@ where digest_reader(&mut options.digest, &mut file, false, options.output_bits) .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; - let sum = match options.output_format { + // Encodes the sum if df is Base64, leaves as-is otherwise. + let encode_sum = |sum: String, df: DigestFormat| { + if df.is_base64() { + encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) + } else { + sum + } + }; + + match options.output_format { OutputFormat::Raw => { let bytes = match options.algo_name { ALGORITHM_OPTIONS_CRC => sum_hex.parse::().unwrap().to_be_bytes().to_vec(), @@ -109,77 +244,22 @@ where stdout().write_all(&bytes)?; return Ok(()); } - OutputFormat::Hexadecimal => sum_hex, - OutputFormat::Base64 => match options.algo_name { - ALGORITHM_OPTIONS_CRC - | ALGORITHM_OPTIONS_CRC32B - | ALGORITHM_OPTIONS_SYSV - | ALGORITHM_OPTIONS_BSD => sum_hex, - _ => encoding::for_cksum::BASE64.encode(&hex::decode(sum_hex).unwrap()), - }, - }; - - // The BSD checksum output is 5 digit integer - let bsd_width = 5; - let (before_filename, should_print_filename, after_filename) = match options.algo_name { - ALGORITHM_OPTIONS_SYSV => ( - format!( - "{} {}{}", - sum.parse::().unwrap(), - sz.div_ceil(options.output_bits), - if is_stdin { "" } else { " " } - ), - !is_stdin, - String::new(), - ), - ALGORITHM_OPTIONS_BSD => ( - format!( - "{:0bsd_width$} {:bsd_width$}{}", - sum.parse::().unwrap(), - sz.div_ceil(options.output_bits), - if is_stdin { "" } else { " " } - ), - !is_stdin, - String::new(), - ), - ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_CRC32B => ( - format!("{sum} {sz}{}", if is_stdin { "" } else { " " }), - !is_stdin, - String::new(), - ), - ALGORITHM_OPTIONS_BLAKE2B if options.tag => { - ( - if let Some(length) = options.length { - // Multiply by 8 here, as we want to print the length in bits. - format!("BLAKE2b-{} (", length * 8) - } else { - "BLAKE2b (".to_owned() - }, - true, - format!(") = {sum}"), - ) + OutputFormat::Legacy => { + print_legacy_checksum(&options, filename, &sum_hex, sz)?; } - _ => { - if options.tag { - ( - format!("{} (", options.algo_name.to_ascii_uppercase()), - true, - format!(") = {sum}"), - ) - } else { - let prefix = if options.asterisk { "*" } else { " " }; - (format!("{sum} {prefix}"), true, String::new()) - } + OutputFormat::Tagged(digest_format) => { + print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; + } + OutputFormat::Untagged(digest_format, reading_mode) => { + print_untagged_checksum( + filename, + &encode_sum(sum_hex, digest_format), + reading_mode, + )?; } - }; - - print!("{before_filename}"); - if should_print_filename { - // The filename might not be valid UTF-8, and filename.display() would mangle the names. - // Therefore, emit the bytes directly to stdout, without any attempt at encoding them. - let _dropped_result = stdout().write_all(os_str_as_bytes(filename.as_os_str())?); } - print!("{after_filename}{}", options.line_ending); + + print!("{}", options.line_ending); } Ok(()) } @@ -203,31 +283,83 @@ mod options { pub const ZERO: &str = "zero"; } -/// cksum has a bunch of legacy behavior. -/// We handle this in this function to make sure they are self contained -/// and "easier" to understand +/// cksum has a bunch of legacy behavior. We handle this in this function to +/// make sure they are self contained and "easier" to understand. +/// +/// Returns a pair of boolean. The first one indicates if we should use tagged +/// output format, the second one indicates if we should use the binary flag in +/// the untagged case. fn handle_tag_text_binary_flags>( args: impl Iterator, ) -> UResult<(bool, bool)> { let mut tag = true; let mut binary = false; + let mut text = false; // --binary, --tag and --untagged are tight together: none of them - // conflicts with each other but --tag will reset "binary" and set "tag". + // conflicts with each other but --tag will reset "binary" and "text" and + // set "tag". for arg in args { let arg = arg.as_ref(); if arg == "-b" || arg == "--binary" { + text = false; binary = true; + } else if arg == "--text" { + text = true; + binary = false; } else if arg == "--tag" { tag = true; binary = false; + text = false; } else if arg == "--untagged" { tag = false; } } - Ok((tag, !tag && binary)) + // Specifying --text without ever mentioning --untagged fails. + if text && tag { + return Err(ChecksumError::TextWithoutUntagged.into()); + } + + Ok((tag, binary)) +} + +/// Use already-processed arguments to decide the output format. +fn figure_out_output_format( + algo: &HashAlgorithm, + tag: bool, + binary: bool, + raw: bool, + base64: bool, +) -> OutputFormat { + // Raw output format takes precedence over anything else. + if raw { + return OutputFormat::Raw; + } + + // Then, if the algo is legacy, takes precedence over the rest + if LEGACY_ALGORITHMS.contains(&algo.name) { + return OutputFormat::Legacy; + } + + let digest_format = if base64 { + DigestFormat::Base64 + } else { + DigestFormat::Hexadecimal + }; + + // After that, decide between tagged and untagged output + if tag { + OutputFormat::Tagged(digest_format) + } else { + let reading_mode = if binary { + ReadingMode::Binary + } else { + ReadingMode::Text + }; + OutputFormat::Untagged(digest_format, reading_mode) + } } #[uucore::main] @@ -261,7 +393,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { None => None, }; - if ["bsd", "crc", "sysv", "crc32b"].contains(&algo_name) && check { + if LEGACY_ALGORITHMS.contains(&algo_name) && check { return Err(ChecksumError::AlgorithmNotSupportedWithCheck.into()); } @@ -306,27 +438,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { return perform_checksum_validation(files, algo_option, length, opts); } - let (tag, asterisk) = handle_tag_text_binary_flags(std::env::args_os())?; + let (tag, binary) = handle_tag_text_binary_flags(std::env::args_os())?; let algo = detect_algo(algo_name, length)?; let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO)); - let output_format = if matches.get_flag(options::RAW) { - OutputFormat::Raw - } else if matches.get_flag(options::BASE64) { - OutputFormat::Base64 - } else { - OutputFormat::Hexadecimal - }; + let output_format = figure_out_output_format( + &algo, + tag, + binary, + matches.get_flag(options::RAW), + matches.get_flag(options::BASE64), + ); let opts = Options { algo_name: algo.name, digest: (algo.create_fn)(), output_bits: algo.bits, length, - tag, output_format, - asterisk, line_ending, }; diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 1ce88cf5984..eae482bb274 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -66,6 +66,13 @@ pub const SUPPORTED_ALGORITHMS: [&str; 16] = [ ALGORITHM_OPTIONS_SHAKE256, ]; +pub const LEGACY_ALGORITHMS: [&str; 4] = [ + ALGORITHM_OPTIONS_SYSV, + ALGORITHM_OPTIONS_BSD, + ALGORITHM_OPTIONS_CRC, + ALGORITHM_OPTIONS_CRC32B, +]; + pub struct HashAlgorithm { pub name: &'static str, pub create_fn: Box Box>, @@ -224,6 +231,8 @@ pub enum ChecksumError { LengthOnlyForBlake2b, #[error("the --binary and --text options are meaningless when verifying checksums")] BinaryTextConflict, + #[error("--text mode is only supported with --untagged")] + TextWithoutUntagged, #[error("--check is not supported with --algorithm={{bsd,sysv,crc,crc32b}}")] AlgorithmNotSupportedWithCheck, #[error("You cannot combine multiple hash algorithms!")] diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index 3e84a2d04e6..ef2f8d86fca 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -615,20 +615,67 @@ fn test_reset_binary_but_set() { .stdout_contains("d41d8cd98f00b204e9800998ecf8427e *"); } -#[test] -fn test_text_tag() { - let scene = TestScenario::new(util_name!()); - let at = &scene.fixtures; +/// Test legacy behaviors with --tag, --untagged, --binary and --text +mod output_format { + use super::*; - at.touch("f"); + #[test] + fn test_text_tag() { + let (at, mut ucmd) = at_and_ucmd!(); + at.touch("f"); - scene - .ucmd() - .arg("--text") // should disappear because of the following option - .arg("--tag") - .arg(at.subdir.join("f")) - .succeeds() - .stdout_contains("4294967295 0 "); + ucmd.arg("--text") // should disappear because of the following option + .arg("--tag") + .args(&["-a", "md5"]) + .arg(at.subdir.join("f")) + .succeeds() + // Tagged output is used + .stdout_contains("f) = d41d8cd98f00b204e9800998ecf8427e"); + } + + #[test] + fn test_text_no_untagged() { + let (at, mut ucmd) = at_and_ucmd!(); + at.touch("f"); + + // --text without --untagged fails + ucmd.arg("--text") + .args(&["-a", "md5"]) + .arg(at.subdir.join("f")) + .fails_with_code(1) + .stderr_contains("--text mode is only supported with --untagged"); + } + + #[test] + fn test_text_binary() { + let (at, mut ucmd) = at_and_ucmd!(); + at.touch("f"); + + // --binary overwrites --text, thus no error is raised + ucmd.arg("--text") + .arg("--binary") + .args(&["-a", "md5"]) + .arg(at.subdir.join("f")) + .succeeds() + // No --untagged, tagged output is used + .stdout_contains("f) = d41d8cd98f00b204e9800998ecf8427e"); + } + + #[test] + fn test_text_binary_untagged() { + let (at, mut ucmd) = at_and_ucmd!(); + at.touch("f"); + + // --binary overwrites --text + ucmd.arg("--text") + .arg("--binary") + .arg("--untagged") + .args(&["-a", "md5"]) + .arg(at.subdir.join("f")) + .succeeds() + // Untagged output is used + .stdout_contains("d41d8cd98f00b204e9800998ecf8427e *"); + } } #[test]