Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[String] New locale aware casing methods #52198

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions src/Symfony/Component/String/AbstractUnicodeString.php
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,21 @@ public function lower(): static
return $str;
}

/**
* @param string $locale In the format language_region (e.g. tr_TR)
*/
public function localeLower(string $locale): static
{
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Lower')) {
$str = clone $this;
$str->string = $transliterator->transliterate($str->string);

return $str;
}

return $this->lower();
}

public function match(string $regexp, int $flags = 0, int $offset = 0): array
{
$match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
Expand Down Expand Up @@ -363,6 +378,21 @@ public function title(bool $allWords = false): static
return $str;
}

/**
* @param string $locale In the format language_region (e.g. tr_TR)
*/
public function localeTitle(string $locale): static
{
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Title')) {
$str = clone $this;
$str->string = $transliterator->transliterate($str->string);

return $str;
}

return $this->title();
}

public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static
{
if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
Expand Down Expand Up @@ -450,6 +480,21 @@ public function upper(): static
return $str;
}

/**
* @param string $locale In the format language_region (e.g. tr_TR)
*/
public function localeUpper(string $locale): static
{
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Upper')) {
$str = clone $this;
$str->string = $transliterator->transliterate($str->string);

return $str;
}

return $this->upper();
}

public function width(bool $ignoreAnsiDecoration = true): int
{
$width = 0;
Expand Down Expand Up @@ -587,4 +632,33 @@ private function wcswidth(string $string): int

return $width;
}

private function getLocaleTransliterator(string $locale, string $id): ?\Transliterator
{
$rule = $locale.'-'.$id;
if (\array_key_exists($rule, self::$transliterators)) {
return self::$transliterators[$rule];
}

if (null !== $transliterator = self::$transliterators[$rule] = \Transliterator::create($rule)) {
return $transliterator;
}

// Try to find a parent locale (nl_BE -> nl)
if (false === $i = strpos($locale, '_')) {
return null;
}

$parentRule = substr_replace($locale, '-'.$id, $i);

// Parent locale was already cached, return and store as current locale
if (\array_key_exists($parentRule, self::$transliterators)) {
return self::$transliterators[$rule] = self::$transliterators[$parentRule];
}

// Create transliterator based on parent locale and cache the result on both initial and parent locale values
$transliterator = \Transliterator::create($parentRule);

return self::$transliterators[$rule] = self::$transliterators[$parentRule] = $transliterator;
}
}
5 changes: 5 additions & 0 deletions src/Symfony/Component/String/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
CHANGELOG
=========

7.1
---

* Add `localeLower()`, `localeUpper()`, `localeTitle()` methods to `AbstractUnicodeString`

6.2
---

Expand Down
114 changes: 114 additions & 0 deletions src/Symfony/Component/String/Tests/AbstractUnicodeTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,48 @@ public function testAsciiClosureRule()
$this->assertSame('Dieser Wert sollte grOEsser oder gleich', (string) $s->ascii([$rule]));
}

/**
* @dataProvider provideLocaleLower
*
* @requires extension intl
*/
public function testLocaleLower(string $locale, string $expected, string $origin)
{
$instance = static::createFromString($origin)->localeLower($locale);

$this->assertNotSame(static::createFromString($origin), $instance);
$this->assertEquals(static::createFromString($expected), $instance);
$this->assertSame($expected, (string) $instance);
}

/**
* @dataProvider provideLocaleUpper
*
* @requires extension intl
*/
public function testLocaleUpper(string $locale, string $expected, string $origin)
{
$instance = static::createFromString($origin)->localeUpper($locale);

$this->assertNotSame(static::createFromString($origin), $instance);
$this->assertEquals(static::createFromString($expected), $instance);
$this->assertSame($expected, (string) $instance);
}

/**
* @dataProvider provideLocaleTitle
*
* @requires extension intl
*/
public function testLocaleTitle(string $locale, string $expected, string $origin)
{
$instance = static::createFromString($origin)->localeTitle($locale);

$this->assertNotSame(static::createFromString($origin), $instance);
$this->assertEquals(static::createFromString($expected), $instance);
$this->assertSame($expected, (string) $instance);
}

public function provideCreateFromCodePoint(): array
{
return [
Expand Down Expand Up @@ -291,6 +333,78 @@ public static function provideLower(): array
);
}

public static function provideLocaleLower(): array
{
return [
// Lithuanian
// Introduce an explicit dot above when lowercasing capital I's and J's
// whenever there are more accents above.
// LATIN CAPITAL LETTER I WITH OGONEK -> LATIN SMALL LETTER I WITH OGONEK
['lt', 'į', 'Į'],
// LATIN CAPITAL LETTER I WITH GRAVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE
['lt', 'i̇̀', 'Ì'],
// LATIN CAPITAL LETTER I WITH ACUTE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING ACUTE ACCENT
['lt', 'i̇́', 'Í'],
// LATIN CAPITAL LETTER I WITH TILDE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING TILDE
['lt', 'i̇̃', 'Ĩ'],

// Turkish and Azeri
// When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into 'i'.
// LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I
['tr', 'i', 'İ'],
['tr_TR', 'i', 'İ'],
['az', 'i', 'İ'],

// Default casing rules
// LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE
['en_US', 'i̇', 'İ'],
['en', 'i̇', 'İ'],
];
}

public static function provideLocaleUpper(): array
{
return [
// Turkish and Azeri
// When uppercasing, i turns into a dotted capital I
// LATIN SMALL LETTER I -> LATIN CAPITAL LETTER I WITH DOT ABOVE
['tr', 'İ', 'i'],
['tr_TR', 'İ', 'i'],
['az', 'İ', 'i'],

// Greek
// Remove accents when uppercasing
// GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA
['el', 'Α', 'ά'],
['el_GR', 'Α', 'ά'],

// Default casing rules
// GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA WITH TONOS
['en_US', 'Ά', 'ά'],
['en', 'Ά', 'ά'],
];
}

public static function provideLocaleTitle(): array
{
return [
// Greek
// Titlecasing words, should keep the accents on the first letter
['el', 'Άδικος', 'άδικος'],
['el_GR', 'Άδικος', 'άδικος'],
['en', 'Άδικος', 'άδικος'],

// Dutch
// Title casing should treat 'ij' as one character
['nl_NL', 'IJssel', 'ijssel'],
['nl_BE', 'IJssel', 'ijssel'],
['nl', 'IJssel', 'ijssel'],

// Default casing rules
['en', 'Ijssel', 'ijssel'],
];
}

public static function provideUpper(): array
{
return array_merge(
Expand Down