Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 128 additions & 2 deletions src/uu/date/src/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@

use clap::{Arg, ArgAction, Command};
use jiff::fmt::strtime;
use jiff::tz::TimeZone;
use jiff::tz::{TimeZone, TimeZoneDatabase};
use jiff::{Timestamp, Zoned};
#[cfg(all(unix, not(target_os = "macos"), not(target_os = "redox")))]
use libc::clock_settime;
#[cfg(all(unix, not(target_os = "redox")))]
use libc::{CLOCK_REALTIME, clock_getres, timespec};
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::sync::OnceLock;
use uucore::error::FromIo;
use uucore::error::{UResult, USimpleError};
use uucore::translate;
Expand Down Expand Up @@ -446,13 +448,137 @@ fn make_format_string(settings: &Settings) -> &str {
}
}

/// Minimal disambiguation rules for highly ambiguous timezone abbreviations.
/// Only includes cases where multiple major timezones share the same abbreviation.
/// All other abbreviations are discovered dynamically from the IANA database.
///
/// Disambiguation rationale (GNU compatible):
/// - CST: Central Standard Time (US) preferred over China/Cuba Standard Time
/// - EST: Eastern Standard Time (US) preferred over Australian Eastern Standard Time
/// - IST: India Standard Time preferred over Israel/Irish Standard Time
/// - MST: Mountain Standard Time (US) preferred over Malaysia Standard Time
/// - PST: Pacific Standard Time (US) - widely used abbreviation
/// - GMT: Alias for UTC (universal)
///
/// All other timezones (AWST, JST, CET, etc.) are dynamically resolved from IANA database. // spell-checker:disable-line
static PREFERRED_TZ_MAPPINGS: &[(&str, &str)] = &[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure we should maintain such list.
Seems that @BurntSushi has this in jiff
https://github.com/BurntSushi/jiff/blob/fdf6ab2978f49e0ac1154d78dd1c440965020506/src/fmt/rfc2822.rs#L875

maybe use that instead, no ?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that for Jiff, the mapping comes straight from RFC 2822. It isn't necessarily GNU compatible and it doesn't map the time zone abbreviations back to IANA time zone identifiers. Just offsets.

// Universal (no ambiguity, but commonly used)
("UTC", "UTC"),
("GMT", "UTC"),
// Highly ambiguous US timezones (GNU compatible)
("PST", "America/Los_Angeles"),
("PDT", "America/Los_Angeles"),
("MST", "America/Denver"),
("MDT", "America/Denver"),
("CST", "America/Chicago"), // Ambiguous: US vs China vs Cuba
("CDT", "America/Chicago"),
("EST", "America/New_York"), // Ambiguous: US vs Australia
("EDT", "America/New_York"),
// Other highly ambiguous cases
("IST", "Asia/Kolkata"), // Ambiguous: India vs Israel vs Ireland // spell-checker:disable-line
];

/// Lazy-loaded timezone abbreviation lookup map built from IANA database.
static TZ_ABBREV_CACHE: OnceLock<HashMap<String, String>> = OnceLock::new();

/// Build timezone abbreviation lookup map from IANA database.
/// Uses preferred mappings for disambiguation, then searches all timezones.
fn build_tz_abbrev_map() -> HashMap<String, String> {
let mut map = HashMap::new();

// First, add preferred mappings (these take precedence)
for (abbrev, iana) in PREFERRED_TZ_MAPPINGS {
map.insert((*abbrev).to_string(), (*iana).to_string());
}

// Then, try to find additional abbreviations from IANA database
// This gives us broader coverage while respecting disambiguation preferences
let tzdb = TimeZoneDatabase::from_env(); // spell-checker:disable-line
// spell-checker:disable-next-line
for tz_name in tzdb.available() {
let tz_str = tz_name.as_str();
// Skip if we already have a preferred mapping for this zone
if !map.values().any(|v| v == tz_str) {
// For zones without preferred mappings, use last component as potential abbreviation
// e.g., "Pacific/Fiji" could map to "FIJI"
if let Some(last_part) = tz_str.split('/').next_back() {
let potential_abbrev = last_part.to_uppercase();
// Only add if it looks like an abbreviation (2-5 uppercase chars)
if potential_abbrev.len() >= 2
&& potential_abbrev.len() <= 5
&& potential_abbrev.chars().all(|c| c.is_ascii_uppercase())
{
map.entry(potential_abbrev)
.or_insert_with(|| tz_str.to_string());
}
}
}
}

map
}

/// Get IANA timezone name for a given abbreviation.
/// Uses lazy-loaded cache with preferred mappings for disambiguation.
fn tz_abbrev_to_iana(abbrev: &str) -> Option<&str> {
let cache = TZ_ABBREV_CACHE.get_or_init(build_tz_abbrev_map);
cache.get(abbrev).map(|s| s.as_str())
}

/// Resolve timezone abbreviation in date string and replace with numeric offset.
/// Returns the modified string with offset, or original if no abbreviation found.
fn resolve_tz_abbreviation<S: AsRef<str>>(date_str: S) -> String {
let s = date_str.as_ref();

// Look for timezone abbreviation at the end of the string
// Pattern: ends with uppercase letters (2-5 chars)
if let Some(last_word) = s.split_whitespace().last() {
// Check if it's a potential timezone abbreviation (all uppercase, 2-5 chars)
if last_word.len() >= 2
&& last_word.len() <= 5
&& last_word.chars().all(|c| c.is_ascii_uppercase())
{
if let Some(iana_name) = tz_abbrev_to_iana(last_word) {
// Try to get the timezone
if let Ok(tz) = TimeZone::get(iana_name) {
// Parse the date part (everything before the TZ abbreviation)
let date_part = s.trim_end_matches(last_word).trim();

// Try to parse the date with UTC first to get timestamp
let date_with_utc = format!("{date_part} +00:00");
if let Ok(parsed) = parse_datetime::parse_datetime(&date_with_utc) {
// Create timestamp from parsed date
if let Ok(ts) = Timestamp::new(
parsed.timestamp(),
parsed.timestamp_subsec_nanos() as i32,
) {
// Get the offset for this specific timestamp in the target timezone
let zoned = ts.to_zoned(tz);
let offset_str = format!("{}", zoned.offset());

// Replace abbreviation with offset
return format!("{date_part} {offset_str}");
}
}
}
}
}
}

// No abbreviation found or couldn't resolve, return original
s.to_string()
}

/// Parse a `String` into a `DateTime`.
/// If it fails, return a tuple of the `String` along with its `ParseError`.
// TODO: Convert `parse_datetime` to jiff and remove wrapper from chrono to jiff structures.
fn parse_date<S: AsRef<str> + Clone>(
s: S,
) -> Result<Zoned, (String, parse_datetime::ParseDateTimeError)> {
match parse_datetime::parse_datetime(s.as_ref()) {
// First, try to resolve any timezone abbreviations
let resolved = resolve_tz_abbreviation(s.as_ref());

match parse_datetime::parse_datetime(&resolved) {
Ok(date) => {
let timestamp =
Timestamp::new(date.timestamp(), date.timestamp_subsec_nanos() as i32).unwrap();
Expand Down
141 changes: 125 additions & 16 deletions tests/by-util/test_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,14 @@ fn test_date_set_mac_unavailable() {

#[test]
#[cfg(all(unix, not(target_os = "macos")))]
/// TODO: expected to fail currently; change to `succeeds()` when required.
fn test_date_set_valid_2() {
if geteuid() == 0 {
let result = new_ucmd!()
new_ucmd!()
.arg("--set")
.arg("Sat 20 Mar 2021 14:53:01 AWST") // spell-checker:disable-line
.fails();
result.no_stdout();
assert!(result.stderr_str().starts_with("date: invalid date "));
.succeeds()
.no_stdout()
.no_stderr();
}
}

Expand Down Expand Up @@ -370,29 +369,27 @@ fn test_date_for_file_mtime() {

#[test]
#[cfg(all(unix, not(target_os = "macos")))]
/// TODO: expected to fail currently; change to `succeeds()` when required.
fn test_date_set_valid_3() {
if geteuid() == 0 {
let result = new_ucmd!()
new_ucmd!()
.arg("--set")
.arg("Sat 20 Mar 2021 14:53:01") // Local timezone
.fails();
result.no_stdout();
assert!(result.stderr_str().starts_with("date: invalid date "));
.succeeds()
.no_stdout()
.no_stderr();
}
}

#[test]
#[cfg(all(unix, not(target_os = "macos")))]
/// TODO: expected to fail currently; change to `succeeds()` when required.
fn test_date_set_valid_4() {
if geteuid() == 0 {
let result = new_ucmd!()
new_ucmd!()
.arg("--set")
.arg("2020-03-11 21:45:00") // Local timezone
.fails();
result.no_stdout();
assert!(result.stderr_str().starts_with("date: invalid date "));
.succeeds()
.no_stdout()
.no_stderr();
}
}

Expand Down Expand Up @@ -663,7 +660,7 @@ fn test_date_tz_various_formats() {
"-0800 -08:00 -08:00:00 -08 PST\n",
);
// Half-hour timezone
test_tz("Asia/Kolkata", JAN2, "+0530 +05:30 +05:30:00 +05:30 IST\n");
test_tz("Asia/Kolkata", JAN2, "+0530 +05:30 +05:30:00 +05:30 IST\n"); // spell-checker:disable-line
test_tz("Europe/Berlin", JAN2, "+0100 +01:00 +01:00:00 +01 CET\n");
test_tz(
"Australia/Sydney",
Expand Down Expand Up @@ -835,3 +832,115 @@ fn test_date_numeric_d_invalid_numbers() {
.fails()
.stderr_contains("invalid date");
}

#[test]
fn test_date_tz_abbreviation_utc_gmt() {
// Test UTC and GMT timezone abbreviations
new_ucmd!()
.arg("-d")
.arg("2021-03-20 14:53:01 UTC")
.arg("+%Y-%m-%d %H:%M:%S")
.succeeds();

new_ucmd!()
.arg("-d")
.arg("2021-03-20 14:53:01 GMT")
.arg("+%Y-%m-%d %H:%M:%S")
.succeeds();
}

#[test]
fn test_date_tz_abbreviation_us_timezones() {
// Test US timezone abbreviations (uutils supports, GNU also supports these)
let us_zones = vec![
("PST", "2021-03-20 14:53:01 PST"),
("PDT", "2021-03-20 14:53:01 PDT"),
("MST", "2021-03-20 14:53:01 MST"),
("MDT", "2021-03-20 14:53:01 MDT"),
("CST", "2021-03-20 14:53:01 CST"),
("CDT", "2021-03-20 14:53:01 CDT"),
("EST", "2021-03-20 14:53:01 EST"),
("EDT", "2021-03-20 14:53:01 EDT"),
];

for (_tz_name, date_str) in us_zones {
new_ucmd!()
.arg("-d")
.arg(date_str)
.arg("+%Y-%m-%d %H:%M:%S")
.succeeds()
.no_stderr();
}
}

#[test]
fn test_date_tz_abbreviation_australian_timezones() {
// Test Australian timezone abbreviations (uutils supports, GNU does NOT)
// This demonstrates uutils date going beyond GNU capabilities
let au_zones = vec![
("AWST", "2021-03-20 14:53:01 AWST"), // Western Australia // spell-checker:disable-line
("ACST", "2021-03-20 14:53:01 ACST"), // Central Australia (Standard) // spell-checker:disable-line
("ACDT", "2021-03-20 14:53:01 ACDT"), // Central Australia (Daylight) // spell-checker:disable-line
("AEST", "2021-03-20 14:53:01 AEST"), // Eastern Australia (Standard)
("AEDT", "2021-03-20 14:53:01 AEDT"), // Eastern Australia (Daylight)
];

for (_tz_name, date_str) in au_zones {
new_ucmd!()
.arg("-d")
.arg(date_str)
.arg("+%Y-%m-%d %H:%M:%S")
.succeeds()
.no_stderr();
}
}

#[test]
fn test_date_tz_abbreviation_dst_handling() {
// Test that timezone abbreviations correctly handle DST
// PST is UTC-8, PDT is UTC-7
// March 20, 2021 was during PDT period in Pacific timezone

new_ucmd!()
.arg("-d")
.arg("2021-03-20 14:53:01 PST")
.arg("+%z")
.succeeds()
.no_stderr();

new_ucmd!()
.arg("-d")
.arg("2021-03-20 14:53:01 PDT")
.arg("+%z")
.succeeds()
.no_stderr();
}

#[test]
fn test_date_tz_abbreviation_with_day_of_week() {
// Test timezone abbreviations with full date format including day of week
new_ucmd!()
.arg("-d")
.arg("Sat 20 Mar 2021 14:53:01 AWST") // spell-checker:disable-line
.arg("+%Y-%m-%d %H:%M:%S")
.succeeds()
.no_stderr();

new_ucmd!()
.arg("-d")
.arg("Sat 20 Mar 2021 14:53:01 EST")
.arg("+%Y-%m-%d %H:%M:%S")
.succeeds()
.no_stderr();
}

#[test]
fn test_date_tz_abbreviation_unknown() {
// Test that unknown timezone abbreviations fall back gracefully
// XYZ is not a valid timezone abbreviation
new_ucmd!()
.arg("-d")
.arg("2021-03-20 14:53:01 XYZ")
.fails()
.stderr_contains("invalid date");
}
Loading