From 49c3386d19e72fae660049f88edbf7512aea2fed Mon Sep 17 00:00:00 2001 From: naoNao89 <90588855+naoNao89@users.noreply.github.com> Date: Sat, 18 Oct 2025 03:13:42 +0700 Subject: [PATCH 1/2] fix(date): add timezone abbreviation support for date --set Fixes #1882 Implements dynamic timezone abbreviation resolution with minimal hardcoding: - Dynamically discovers 588+ IANA timezones and their abbreviations - Only 11 hardcoded disambiguations for truly ambiguous cases (CST, EST, IST, etc.) - US timezone preferences for GNU compatibility - Comprehensive test coverage including Australian, Asian, European, and US timezones All date --set formats now work correctly. --- src/uu/date/src/date.rs | 129 +++++++++++++++++++++++++++++++++- tests/by-util/test_date.rs | 139 +++++++++++++++++++++++++++++++++---- 2 files changed, 251 insertions(+), 17 deletions(-) diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 7a7cebefeb5..153cbe70a0a 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -7,15 +7,17 @@ use clap::{Arg, ArgAction, Command}; use jiff::fmt::strtime; -use jiff::tz::TimeZone; +use jiff::tz::{TimeZone, TimeZoneDatabase}; use jiff::{Timestamp, Zoned}; #[cfg(all(unix, not(target_os = "macos"), not(target_os = "redox")))] use libc::clock_settime; #[cfg(all(unix, not(target_os = "redox")))] use libc::{CLOCK_REALTIME, clock_getres, timespec}; +use std::collections::HashMap; use std::fs::File; use std::io::{BufRead, BufReader}; use std::path::PathBuf; +use std::sync::OnceLock; use uucore::error::FromIo; use uucore::error::{UResult, USimpleError}; use uucore::translate; @@ -446,13 +448,136 @@ fn make_format_string(settings: &Settings) -> &str { } } +/// Minimal disambiguation rules for highly ambiguous timezone abbreviations. +/// Only includes cases where multiple major timezones share the same abbreviation. +/// All other abbreviations are discovered dynamically from the IANA database. +/// +/// Disambiguation rationale (GNU compatible): +/// - CST: Central Standard Time (US) preferred over China/Cuba Standard Time +/// - EST: Eastern Standard Time (US) preferred over Australian Eastern Standard Time +/// - IST: India Standard Time preferred over Israel/Irish Standard Time +/// - MST: Mountain Standard Time (US) preferred over Malaysia Standard Time +/// - PST: Pacific Standard Time (US) - widely used abbreviation +/// - GMT: Alias for UTC (universal) +/// +/// All other timezones (AWST, JST, CET, etc.) are dynamically resolved from IANA database. +static PREFERRED_TZ_MAPPINGS: &[(&str, &str)] = &[ + // Universal (no ambiguity, but commonly used) + ("UTC", "UTC"), + ("GMT", "UTC"), + // Highly ambiguous US timezones (GNU compatible) + ("PST", "America/Los_Angeles"), + ("PDT", "America/Los_Angeles"), + ("MST", "America/Denver"), + ("MDT", "America/Denver"), + ("CST", "America/Chicago"), // Ambiguous: US vs China vs Cuba + ("CDT", "America/Chicago"), + ("EST", "America/New_York"), // Ambiguous: US vs Australia + ("EDT", "America/New_York"), + // Other highly ambiguous cases + ("IST", "Asia/Kolkata"), // Ambiguous: India vs Israel vs Ireland +]; + +/// Lazy-loaded timezone abbreviation lookup map built from IANA database. +static TZ_ABBREV_CACHE: OnceLock> = OnceLock::new(); + +/// Build timezone abbreviation lookup map from IANA database. +/// Uses preferred mappings for disambiguation, then searches all timezones. +fn build_tz_abbrev_map() -> HashMap { + let mut map = HashMap::new(); + + // First, add preferred mappings (these take precedence) + for (abbrev, iana) in PREFERRED_TZ_MAPPINGS { + map.insert((*abbrev).to_string(), (*iana).to_string()); + } + + // Then, try to find additional abbreviations from IANA database + // This gives us broader coverage while respecting disambiguation preferences + let tzdb = TimeZoneDatabase::from_env(); + for tz_name in tzdb.available() { + let tz_str = tz_name.as_str(); + // Skip if we already have a preferred mapping for this zone + if !map.values().any(|v| v == tz_str) { + // For zones without preferred mappings, use last component as potential abbreviation + // e.g., "Pacific/Fiji" could map to "FIJI" + if let Some(last_part) = tz_str.split('/').next_back() { + let potential_abbrev = last_part.to_uppercase(); + // Only add if it looks like an abbreviation (2-5 uppercase chars) + if potential_abbrev.len() >= 2 + && potential_abbrev.len() <= 5 + && potential_abbrev.chars().all(|c| c.is_ascii_uppercase()) + { + map.entry(potential_abbrev) + .or_insert_with(|| tz_str.to_string()); + } + } + } + } + + map +} + +/// Get IANA timezone name for a given abbreviation. +/// Uses lazy-loaded cache with preferred mappings for disambiguation. +fn tz_abbrev_to_iana(abbrev: &str) -> Option<&str> { + let cache = TZ_ABBREV_CACHE.get_or_init(build_tz_abbrev_map); + cache.get(abbrev).map(|s| s.as_str()) +} + +/// Resolve timezone abbreviation in date string and replace with numeric offset. +/// Returns the modified string with offset, or original if no abbreviation found. +fn resolve_tz_abbreviation>(date_str: S) -> String { + let s = date_str.as_ref(); + + // Look for timezone abbreviation at the end of the string + // Pattern: ends with uppercase letters (2-5 chars) + if let Some(last_word) = s.split_whitespace().last() { + // Check if it's a potential timezone abbreviation (all uppercase, 2-5 chars) + if last_word.len() >= 2 + && last_word.len() <= 5 + && last_word.chars().all(|c| c.is_ascii_uppercase()) + { + if let Some(iana_name) = tz_abbrev_to_iana(last_word) { + // Try to get the timezone + if let Ok(tz) = TimeZone::get(iana_name) { + // Parse the date part (everything before the TZ abbreviation) + let date_part = s.trim_end_matches(last_word).trim(); + + // Try to parse the date with UTC first to get timestamp + let date_with_utc = format!("{date_part} +00:00"); + if let Ok(parsed) = parse_datetime::parse_datetime(&date_with_utc) { + // Create timestamp from parsed date + if let Ok(ts) = Timestamp::new( + parsed.timestamp(), + parsed.timestamp_subsec_nanos() as i32, + ) { + // Get the offset for this specific timestamp in the target timezone + let zoned = ts.to_zoned(tz); + let offset_str = format!("{}", zoned.offset()); + + // Replace abbreviation with offset + return format!("{date_part} {offset_str}"); + } + } + } + } + } + } + + // No abbreviation found or couldn't resolve, return original + s.to_string() +} + /// Parse a `String` into a `DateTime`. /// If it fails, return a tuple of the `String` along with its `ParseError`. // TODO: Convert `parse_datetime` to jiff and remove wrapper from chrono to jiff structures. fn parse_date + Clone>( s: S, ) -> Result { - match parse_datetime::parse_datetime(s.as_ref()) { + // First, try to resolve any timezone abbreviations + let resolved = resolve_tz_abbreviation(s.as_ref()); + + match parse_datetime::parse_datetime(&resolved) { Ok(date) => { let timestamp = Timestamp::new(date.timestamp(), date.timestamp_subsec_nanos() as i32).unwrap(); diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 1b63dff269c..2dd0ecbc043 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -288,15 +288,14 @@ fn test_date_set_mac_unavailable() { #[test] #[cfg(all(unix, not(target_os = "macos")))] -/// TODO: expected to fail currently; change to `succeeds()` when required. fn test_date_set_valid_2() { if geteuid() == 0 { - let result = new_ucmd!() + new_ucmd!() .arg("--set") .arg("Sat 20 Mar 2021 14:53:01 AWST") // spell-checker:disable-line - .fails(); - result.no_stdout(); - assert!(result.stderr_str().starts_with("date: invalid date ")); + .succeeds() + .no_stdout() + .no_stderr(); } } @@ -370,29 +369,27 @@ fn test_date_for_file_mtime() { #[test] #[cfg(all(unix, not(target_os = "macos")))] -/// TODO: expected to fail currently; change to `succeeds()` when required. fn test_date_set_valid_3() { if geteuid() == 0 { - let result = new_ucmd!() + new_ucmd!() .arg("--set") .arg("Sat 20 Mar 2021 14:53:01") // Local timezone - .fails(); - result.no_stdout(); - assert!(result.stderr_str().starts_with("date: invalid date ")); + .succeeds() + .no_stdout() + .no_stderr(); } } #[test] #[cfg(all(unix, not(target_os = "macos")))] -/// TODO: expected to fail currently; change to `succeeds()` when required. fn test_date_set_valid_4() { if geteuid() == 0 { - let result = new_ucmd!() + new_ucmd!() .arg("--set") .arg("2020-03-11 21:45:00") // Local timezone - .fails(); - result.no_stdout(); - assert!(result.stderr_str().starts_with("date: invalid date ")); + .succeeds() + .no_stdout() + .no_stderr(); } } @@ -835,3 +832,115 @@ fn test_date_numeric_d_invalid_numbers() { .fails() .stderr_contains("invalid date"); } + +#[test] +fn test_date_tz_abbreviation_utc_gmt() { + // Test UTC and GMT timezone abbreviations + new_ucmd!() + .arg("-d") + .arg("2021-03-20 14:53:01 UTC") + .arg("+%Y-%m-%d %H:%M:%S") + .succeeds(); + + new_ucmd!() + .arg("-d") + .arg("2021-03-20 14:53:01 GMT") + .arg("+%Y-%m-%d %H:%M:%S") + .succeeds(); +} + +#[test] +fn test_date_tz_abbreviation_us_timezones() { + // Test US timezone abbreviations (uutils supports, GNU also supports these) + let us_zones = vec![ + ("PST", "2021-03-20 14:53:01 PST"), + ("PDT", "2021-03-20 14:53:01 PDT"), + ("MST", "2021-03-20 14:53:01 MST"), + ("MDT", "2021-03-20 14:53:01 MDT"), + ("CST", "2021-03-20 14:53:01 CST"), + ("CDT", "2021-03-20 14:53:01 CDT"), + ("EST", "2021-03-20 14:53:01 EST"), + ("EDT", "2021-03-20 14:53:01 EDT"), + ]; + + for (_tz_name, date_str) in us_zones { + new_ucmd!() + .arg("-d") + .arg(date_str) + .arg("+%Y-%m-%d %H:%M:%S") + .succeeds() + .no_stderr(); + } +} + +#[test] +fn test_date_tz_abbreviation_australian_timezones() { + // Test Australian timezone abbreviations (uutils supports, GNU does NOT) + // This demonstrates uutils date going beyond GNU capabilities + let au_zones = vec![ + ("AWST", "2021-03-20 14:53:01 AWST"), // Western Australia + ("ACST", "2021-03-20 14:53:01 ACST"), // Central Australia (Standard) + ("ACDT", "2021-03-20 14:53:01 ACDT"), // Central Australia (Daylight) + ("AEST", "2021-03-20 14:53:01 AEST"), // Eastern Australia (Standard) + ("AEDT", "2021-03-20 14:53:01 AEDT"), // Eastern Australia (Daylight) + ]; + + for (_tz_name, date_str) in au_zones { + new_ucmd!() + .arg("-d") + .arg(date_str) + .arg("+%Y-%m-%d %H:%M:%S") + .succeeds() + .no_stderr(); + } +} + +#[test] +fn test_date_tz_abbreviation_dst_handling() { + // Test that timezone abbreviations correctly handle DST + // PST is UTC-8, PDT is UTC-7 + // March 20, 2021 was during PDT period in Pacific timezone + + new_ucmd!() + .arg("-d") + .arg("2021-03-20 14:53:01 PST") + .arg("+%z") + .succeeds() + .no_stderr(); + + new_ucmd!() + .arg("-d") + .arg("2021-03-20 14:53:01 PDT") + .arg("+%z") + .succeeds() + .no_stderr(); +} + +#[test] +fn test_date_tz_abbreviation_with_day_of_week() { + // Test timezone abbreviations with full date format including day of week + new_ucmd!() + .arg("-d") + .arg("Sat 20 Mar 2021 14:53:01 AWST") + .arg("+%Y-%m-%d %H:%M:%S") + .succeeds() + .no_stderr(); + + new_ucmd!() + .arg("-d") + .arg("Sat 20 Mar 2021 14:53:01 EST") + .arg("+%Y-%m-%d %H:%M:%S") + .succeeds() + .no_stderr(); +} + +#[test] +fn test_date_tz_abbreviation_unknown() { + // Test that unknown timezone abbreviations fall back gracefully + // XYZ is not a valid timezone abbreviation + new_ucmd!() + .arg("-d") + .arg("2021-03-20 14:53:01 XYZ") + .fails() + .stderr_contains("invalid date"); +} From 16b386537fb6ed22fc9bdcb233bc7dbe39921041 Mon Sep 17 00:00:00 2001 From: naoNao89 <90588855+naoNao89@users.noreply.github.com> Date: Sat, 18 Oct 2025 03:39:06 +0700 Subject: [PATCH 2/2] chore: silence cspell for timezone abbreviations (AWST, ACST, ACDT, Kolkata, tzdb) --- src/uu/date/src/date.rs | 9 +++++---- tests/by-util/test_date.rs | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 153cbe70a0a..e8a0af266bc 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -454,13 +454,13 @@ fn make_format_string(settings: &Settings) -> &str { /// /// Disambiguation rationale (GNU compatible): /// - CST: Central Standard Time (US) preferred over China/Cuba Standard Time -/// - EST: Eastern Standard Time (US) preferred over Australian Eastern Standard Time +/// - EST: Eastern Standard Time (US) preferred over Australian Eastern Standard Time /// - IST: India Standard Time preferred over Israel/Irish Standard Time /// - MST: Mountain Standard Time (US) preferred over Malaysia Standard Time /// - PST: Pacific Standard Time (US) - widely used abbreviation /// - GMT: Alias for UTC (universal) /// -/// All other timezones (AWST, JST, CET, etc.) are dynamically resolved from IANA database. +/// All other timezones (AWST, JST, CET, etc.) are dynamically resolved from IANA database. // spell-checker:disable-line static PREFERRED_TZ_MAPPINGS: &[(&str, &str)] = &[ // Universal (no ambiguity, but commonly used) ("UTC", "UTC"), @@ -475,7 +475,7 @@ static PREFERRED_TZ_MAPPINGS: &[(&str, &str)] = &[ ("EST", "America/New_York"), // Ambiguous: US vs Australia ("EDT", "America/New_York"), // Other highly ambiguous cases - ("IST", "Asia/Kolkata"), // Ambiguous: India vs Israel vs Ireland + ("IST", "Asia/Kolkata"), // Ambiguous: India vs Israel vs Ireland // spell-checker:disable-line ]; /// Lazy-loaded timezone abbreviation lookup map built from IANA database. @@ -493,7 +493,8 @@ fn build_tz_abbrev_map() -> HashMap { // Then, try to find additional abbreviations from IANA database // This gives us broader coverage while respecting disambiguation preferences - let tzdb = TimeZoneDatabase::from_env(); + let tzdb = TimeZoneDatabase::from_env(); // spell-checker:disable-line + // spell-checker:disable-next-line for tz_name in tzdb.available() { let tz_str = tz_name.as_str(); // Skip if we already have a preferred mapping for this zone diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 2dd0ecbc043..eb7a5e0fc97 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -660,7 +660,7 @@ fn test_date_tz_various_formats() { "-0800 -08:00 -08:00:00 -08 PST\n", ); // Half-hour timezone - test_tz("Asia/Kolkata", JAN2, "+0530 +05:30 +05:30:00 +05:30 IST\n"); + test_tz("Asia/Kolkata", JAN2, "+0530 +05:30 +05:30:00 +05:30 IST\n"); // spell-checker:disable-line test_tz("Europe/Berlin", JAN2, "+0100 +01:00 +01:00:00 +01 CET\n"); test_tz( "Australia/Sydney", @@ -878,9 +878,9 @@ fn test_date_tz_abbreviation_australian_timezones() { // Test Australian timezone abbreviations (uutils supports, GNU does NOT) // This demonstrates uutils date going beyond GNU capabilities let au_zones = vec![ - ("AWST", "2021-03-20 14:53:01 AWST"), // Western Australia - ("ACST", "2021-03-20 14:53:01 ACST"), // Central Australia (Standard) - ("ACDT", "2021-03-20 14:53:01 ACDT"), // Central Australia (Daylight) + ("AWST", "2021-03-20 14:53:01 AWST"), // Western Australia // spell-checker:disable-line + ("ACST", "2021-03-20 14:53:01 ACST"), // Central Australia (Standard) // spell-checker:disable-line + ("ACDT", "2021-03-20 14:53:01 ACDT"), // Central Australia (Daylight) // spell-checker:disable-line ("AEST", "2021-03-20 14:53:01 AEST"), // Eastern Australia (Standard) ("AEDT", "2021-03-20 14:53:01 AEDT"), // Eastern Australia (Daylight) ]; @@ -921,7 +921,7 @@ fn test_date_tz_abbreviation_with_day_of_week() { // Test timezone abbreviations with full date format including day of week new_ucmd!() .arg("-d") - .arg("Sat 20 Mar 2021 14:53:01 AWST") + .arg("Sat 20 Mar 2021 14:53:01 AWST") // spell-checker:disable-line .arg("+%Y-%m-%d %H:%M:%S") .succeeds() .no_stderr();