Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/GnuTests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ jobs:
sudo locale-gen --keep-existing am_ET.UTF-8 # Ethiopia
sudo locale-gen --keep-existing th_TH.UTF-8 # Thailand
sudo locale-gen --keep-existing zh_CN.GB18030 # China
sudo locale-gen --keep-existing ja_JP.UTF-8 # Japan

sudo update-locale
echo "After:"
Expand Down
1 change: 1 addition & 0 deletions .vscode/cspell.dictionaries/jargon.wordlist.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
janv
AFAICT
asimd
ASIMD
Expand Down
42 changes: 21 additions & 21 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ skip = [
{ name = "itertools", version = "0.13.0" },
# ordered-multimap
{ name = "hashbrown", version = "0.14.5" },
# lru (via num-prime)
# lru (via num-prime), icu4x
{ name = "hashbrown", version = "0.15.5" },
# cexpr (via bindgen)
{ name = "nom", version = "7.1.3" },
Expand Down
2 changes: 2 additions & 0 deletions src/uu/sort/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ uucore = { workspace = true, features = [
"version-cmp",
"i18n-decimal",
"i18n-collator",
"i18n-month",
] }
fluent = { workspace = true }

Expand All @@ -60,6 +61,7 @@ uucore = { workspace = true, features = [
"parser-size",
"version-cmp",
"i18n-collator",
"i18n-month",
] }

[[bin]]
Expand Down
46 changes: 3 additions & 43 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ use uucore::extendedbigdecimal::ExtendedBigDecimal;
#[cfg(feature = "i18n-collator")]
use uucore::i18n::collator::locale_cmp;
use uucore::i18n::decimal::locale_decimal_separator;
use uucore::i18n::month::month_parse as locale_month_parse;
use uucore::line_ending::LineEnding;
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
use uucore::parser::parse_size::{ParseSizeError, Parser};
Expand Down Expand Up @@ -779,7 +780,7 @@ impl<'a> Line<'a> {
.enumerate()
.skip_while(|(_, c)| c.is_ascii_whitespace());

let month = if month_parse(initial_selection) == Month::Unknown {
let month = if locale_month_parse(initial_selection) == 0 {
// We failed to parse a month, which is equivalent to matching nothing.
// Add the "no match for key" marker to the first non-whitespace character.
let first_non_whitespace = month_chars.next();
Expand Down Expand Up @@ -2967,49 +2968,8 @@ fn random_shuffle(a: &[u8], b: &[u8], salt: &[u8]) -> Ordering {
da.cmp(&db)
}

#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
enum Month {
Unknown,
January,
February,
March,
April,
May,
June,
July,
August,
September,
October,
November,
December,
}

/// Parse the beginning string into a Month, returning [`Month::Unknown`] on errors.
fn month_parse(line: &[u8]) -> Month {
let line = line.trim_ascii_start();

match line.get(..3).map(|x| x.to_ascii_uppercase()).as_deref() {
Some(b"JAN") => Month::January,
Some(b"FEB") => Month::February,
Some(b"MAR") => Month::March,
Some(b"APR") => Month::April,
Some(b"MAY") => Month::May,
Some(b"JUN") => Month::June,
Some(b"JUL") => Month::July,
Some(b"AUG") => Month::August,
Some(b"SEP") => Month::September,
Some(b"OCT") => Month::October,
Some(b"NOV") => Month::November,
Some(b"DEC") => Month::December,
_ => Month::Unknown,
}
}

fn month_compare(a: &[u8], b: &[u8]) -> Ordering {
let ma = month_parse(a);
let mb = month_parse(b);

ma.cmp(&mb)
locale_month_parse(a).cmp(&locale_month_parse(b))
}

fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
Expand Down
3 changes: 2 additions & 1 deletion src/uucore/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ format = [
"quoting-style",
"unit-prefix",
]
i18n-all = ["i18n-collator", "i18n-decimal", "i18n-datetime"]
i18n-all = ["i18n-collator", "i18n-decimal", "i18n-datetime", "i18n-month"]
i18n-common = ["icu_locale"]
i18n-collator = ["i18n-common", "icu_collator"]
i18n-decimal = ["i18n-common", "icu_decimal", "icu_provider"]
Expand All @@ -161,6 +161,7 @@ i18n-datetime = [
"jiff-icu",
"jiff",
]
i18n-month = ["i18n-common", "icu_datetime", "icu_provider", "libc"]
mode = ["libc"]
perms = ["entries", "libc", "walkdir"]
buf-copy = []
Expand Down
9 changes: 9 additions & 0 deletions src/uucore/src/lib/features/i18n/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ pub mod collator;
pub mod datetime;
#[cfg(feature = "i18n-decimal")]
pub mod decimal;
#[cfg(feature = "i18n-month")]
pub mod month;

/// The encoding specified by the locale, if specified
/// Currently only supports ASCII and UTF-8 for the sake of simplicity.
Expand Down Expand Up @@ -86,6 +88,13 @@ pub fn get_numeric_locale() -> &'static (Locale, UEncoding) {
NUMERIC_LOCALE.get_or_init(|| get_locale_from_env("LC_NUMERIC"))
}

/// Get the time locale from the environment (used for month names, etc.)
pub fn get_time_locale() -> &'static (Locale, UEncoding) {
static TIME_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();

TIME_LOCALE.get_or_init(|| get_locale_from_env("LC_TIME"))
}

/// Return the encoding deduced from the locale environment variable.
pub fn get_locale_encoding() -> UEncoding {
get_collating_locale().1
Expand Down
85 changes: 85 additions & 0 deletions src/uucore/src/lib/features/i18n/month.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use std::sync::OnceLock;

use icu_datetime::provider::neo::{DatetimeNamesMonthGregorianV1, MonthNames};
use icu_locale::{Locale, locale};
use icu_provider::prelude::*;

use crate::i18n::get_time_locale;

fn load_month_names(loc: &Locale) -> Option<Vec<(String, u8)>> {
let data_locale = DataLocale::from(loc.clone());
let abbr_attr = DataMarkerAttributes::from_str_or_panic("3");
let request = DataRequest {
id: DataIdentifierBorrowed::for_marker_attributes_and_locale(abbr_attr, &data_locale),
metadata: DataRequestMetadata::default(),
};

let response: DataResponse<DatetimeNamesMonthGregorianV1> =
icu_datetime::provider::Baked.load(request).ok()?;

if let MonthNames::Linear(names) = response.payload.get() {
let mut result = Vec::new();
for (i, name) in names.iter().take(12).enumerate() {
let month = (i + 1) as u8;
let upper = name.to_uppercase();
// Some locales use trailing periods in abbreviated months (e.g., "janv." in French).
// Store both with and without the period so we can match either format.
let stripped = upper.trim_end_matches('.');
if stripped != upper {
result.push((stripped.to_string(), month));
}
result.push((upper, month));
}
return Some(result);
}
None
}

fn get_month_names() -> &'static Vec<(String, u8)> {
static MONTH_NAMES: OnceLock<Vec<(String, u8)>> = OnceLock::new();
MONTH_NAMES.get_or_init(|| {
let loc = get_time_locale().0.clone();
// For undefined locale (C/POSIX), ICU returns generic month names like "M01", "M02"
// which aren't useful for matching. Skip directly to English fallback.
let result = if loc == locale!("und") {
None
} else {
load_month_names(&loc)
};
result
.or_else(|| load_month_names(&locale!("en")))
.expect("ICU should always have English month data")
})
}

/// Parse a month name from the beginning of the input bytes.
/// Returns month number (1-12) or 0 if not recognized.
pub fn month_parse(input: &[u8]) -> u8 {
let input = input.trim_ascii_start();

// Convert bytes to string for comparison. For valid UTF-8, use it directly.
// For non-UTF-8 (e.g., Latin-1 locales), treat each byte as a Unicode codepoint.
// This handles legacy encodings like ISO-8859-1 where byte 0xE9 = 'é'.
let input_upper = std::str::from_utf8(input).map_or_else(
|_| {
input
.iter()
.map(|&b| b as char)
.collect::<String>()
.to_uppercase()
},
|s| s.to_uppercase(),
);

for (name, month) in get_month_names() {
if input_upper.starts_with(name) {
return *month;
}
}
0
}
Loading
Loading