uutils · ChrisDryden · Feb 2, 2026 · Dec 19, 2025 · Jan 14, 2026 · Jan 26, 2026
diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml
@@ -89,6 +89,7 @@ jobs:
         sudo locale-gen --keep-existing am_ET.UTF-8 # Ethiopia
         sudo locale-gen --keep-existing th_TH.UTF-8 # Thailand
         sudo locale-gen --keep-existing zh_CN.GB18030 # China
+        sudo locale-gen --keep-existing ja_JP.UTF-8 # Japan
 
         sudo update-locale
         echo "After:"

diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt
@@ -1,3 +1,4 @@
+janv
 AFAICT
 asimd
 ASIMD

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/deny.toml b/deny.toml
@@ -88,7 +88,7 @@ skip = [
   { name = "itertools", version = "0.13.0" },
   # ordered-multimap
   { name = "hashbrown", version = "0.14.5" },
-  # lru (via num-prime)
+  # lru (via num-prime), icu4x
   { name = "hashbrown", version = "0.15.5" },
   # cexpr (via bindgen)
   { name = "nom", version = "7.1.3" },

diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml
@@ -42,6 +42,7 @@ uucore = { workspace = true, features = [
   "version-cmp",
   "i18n-decimal",
   "i18n-collator",
+  "i18n-month",
 ] }
 fluent = { workspace = true }
 
@@ -60,6 +61,7 @@ uucore = { workspace = true, features = [
   "parser-size",
   "version-cmp",
   "i18n-collator",
+  "i18n-month",
 ] }
 
 [[bin]]

diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs
@@ -50,6 +50,7 @@ use uucore::extendedbigdecimal::ExtendedBigDecimal;
 #[cfg(feature = "i18n-collator")]
 use uucore::i18n::collator::locale_cmp;
 use uucore::i18n::decimal::locale_decimal_separator;
+use uucore::i18n::month::month_parse as locale_month_parse;
 use uucore::line_ending::LineEnding;
 use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
 use uucore::parser::parse_size::{ParseSizeError, Parser};
@@ -779,7 +780,7 @@ impl<'a> Line<'a> {
                         .enumerate()
                         .skip_while(|(_, c)| c.is_ascii_whitespace());
 
-                    let month = if month_parse(initial_selection) == Month::Unknown {
+                    let month = if locale_month_parse(initial_selection) == 0 {
                         // We failed to parse a month, which is equivalent to matching nothing.
                         // Add the "no match for key" marker to the first non-whitespace character.
                         let first_non_whitespace = month_chars.next();
@@ -2967,49 +2968,8 @@ fn random_shuffle(a: &[u8], b: &[u8], salt: &[u8]) -> Ordering {
     da.cmp(&db)
 }
 
-#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
-enum Month {
-    Unknown,
-    January,
-    February,
-    March,
-    April,
-    May,
-    June,
-    July,
-    August,
-    September,
-    October,
-    November,
-    December,
-}
-
-/// Parse the beginning string into a Month, returning [`Month::Unknown`] on errors.
-fn month_parse(line: &[u8]) -> Month {
-    let line = line.trim_ascii_start();
-
-    match line.get(..3).map(|x| x.to_ascii_uppercase()).as_deref() {
-        Some(b"JAN") => Month::January,
-        Some(b"FEB") => Month::February,
-        Some(b"MAR") => Month::March,
-        Some(b"APR") => Month::April,
-        Some(b"MAY") => Month::May,
-        Some(b"JUN") => Month::June,
-        Some(b"JUL") => Month::July,
-        Some(b"AUG") => Month::August,
-        Some(b"SEP") => Month::September,
-        Some(b"OCT") => Month::October,
-        Some(b"NOV") => Month::November,
-        Some(b"DEC") => Month::December,
-        _ => Month::Unknown,
-    }
-}
-
 fn month_compare(a: &[u8], b: &[u8]) -> Ordering {
-    let ma = month_parse(a);
-    let mb = month_parse(b);
-
-    ma.cmp(&mb)
+    locale_month_parse(a).cmp(&locale_month_parse(b))
 }
 
 fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(

diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml
@@ -150,7 +150,7 @@ format = [
   "quoting-style",
   "unit-prefix",
 ]
-i18n-all = ["i18n-collator", "i18n-decimal", "i18n-datetime"]
+i18n-all = ["i18n-collator", "i18n-decimal", "i18n-datetime", "i18n-month"]
 i18n-common = ["icu_locale"]
 i18n-collator = ["i18n-common", "icu_collator"]
 i18n-decimal = ["i18n-common", "icu_decimal", "icu_provider"]
@@ -161,6 +161,7 @@ i18n-datetime = [
   "jiff-icu",
   "jiff",
 ]
+i18n-month = ["i18n-common", "icu_datetime", "icu_provider", "libc"]
 mode = ["libc"]
 perms = ["entries", "libc", "walkdir"]
 buf-copy = []

diff --git a/src/uucore/src/lib/features/i18n/mod.rs b/src/uucore/src/lib/features/i18n/mod.rs
@@ -13,6 +13,8 @@ pub mod collator;
 pub mod datetime;
 #[cfg(feature = "i18n-decimal")]
 pub mod decimal;
+#[cfg(feature = "i18n-month")]
+pub mod month;
 
 /// The encoding specified by the locale, if specified
 /// Currently only supports ASCII and UTF-8 for the sake of simplicity.
@@ -86,6 +88,13 @@ pub fn get_numeric_locale() -> &'static (Locale, UEncoding) {
     NUMERIC_LOCALE.get_or_init(|| get_locale_from_env("LC_NUMERIC"))
 }
 
+/// Get the time locale from the environment (used for month names, etc.)
+pub fn get_time_locale() -> &'static (Locale, UEncoding) {
+    static TIME_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
+
+    TIME_LOCALE.get_or_init(|| get_locale_from_env("LC_TIME"))
+}
+
 /// Return the encoding deduced from the locale environment variable.
 pub fn get_locale_encoding() -> UEncoding {
     get_collating_locale().1

diff --git a/src/uucore/src/lib/features/i18n/month.rs b/src/uucore/src/lib/features/i18n/month.rs
@@ -0,0 +1,85 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+use std::sync::OnceLock;
+
+use icu_datetime::provider::neo::{DatetimeNamesMonthGregorianV1, MonthNames};
+use icu_locale::{Locale, locale};
+use icu_provider::prelude::*;
+
+use crate::i18n::get_time_locale;
+
+fn load_month_names(loc: &Locale) -> Option<Vec<(String, u8)>> {
+    let data_locale = DataLocale::from(loc.clone());
+    let abbr_attr = DataMarkerAttributes::from_str_or_panic("3");
+    let request = DataRequest {
+        id: DataIdentifierBorrowed::for_marker_attributes_and_locale(abbr_attr, &data_locale),
+        metadata: DataRequestMetadata::default(),
+    };
+
+    let response: DataResponse<DatetimeNamesMonthGregorianV1> =
+        icu_datetime::provider::Baked.load(request).ok()?;
+
+    if let MonthNames::Linear(names) = response.payload.get() {
+        let mut result = Vec::new();
+        for (i, name) in names.iter().take(12).enumerate() {
+            let month = (i + 1) as u8;
+            let upper = name.to_uppercase();
+            // Some locales use trailing periods in abbreviated months (e.g., "janv." in French).
+            // Store both with and without the period so we can match either format.
+            let stripped = upper.trim_end_matches('.');
+            if stripped != upper {
+                result.push((stripped.to_string(), month));
+            }
+            result.push((upper, month));
+        }
+        return Some(result);
+    }
+    None
+}
+
+fn get_month_names() -> &'static Vec<(String, u8)> {
+    static MONTH_NAMES: OnceLock<Vec<(String, u8)>> = OnceLock::new();
+    MONTH_NAMES.get_or_init(|| {
+        let loc = get_time_locale().0.clone();
+        // For undefined locale (C/POSIX), ICU returns generic month names like "M01", "M02"
+        // which aren't useful for matching. Skip directly to English fallback.
+        let result = if loc == locale!("und") {
+            None
+        } else {
+            load_month_names(&loc)
+        };
+        result
+            .or_else(|| load_month_names(&locale!("en")))
+            .expect("ICU should always have English month data")
+    })
+}
+
+/// Parse a month name from the beginning of the input bytes.
+/// Returns month number (1-12) or 0 if not recognized.
+pub fn month_parse(input: &[u8]) -> u8 {
+    let input = input.trim_ascii_start();
+
+    // Convert bytes to string for comparison. For valid UTF-8, use it directly.
+    // For non-UTF-8 (e.g., Latin-1 locales), treat each byte as a Unicode codepoint.
+    // This handles legacy encodings like ISO-8859-1 where byte 0xE9 = 'é'.
+    let input_upper = std::str::from_utf8(input).map_or_else(
+        |_| {
+            input
+                .iter()
+                .map(|&b| b as char)
+                .collect::<String>()
+                .to_uppercase()
+        },
+        |s| s.to_uppercase(),
+    );
+
+    for (name, month) in get_month_names() {
+        if input_upper.starts_with(name) {
+            return *month;
+        }
+    }
+    0
+}
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    janv
     AFAICT
     asimd
     ASIMD
@@ Expand Down @@