diff --git a/src/Symfony/Component/String/AbstractUnicodeString.php b/src/Symfony/Component/String/AbstractUnicodeString.php index df7265f3ebd15..af0532999018b 100644 --- a/src/Symfony/Component/String/AbstractUnicodeString.php +++ b/src/Symfony/Component/String/AbstractUnicodeString.php @@ -220,6 +220,21 @@ public function lower(): static return $str; } + /** + * @param string $locale In the format language_region (e.g. tr_TR) + */ + public function localeLower(string $locale): static + { + if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Lower')) { + $str = clone $this; + $str->string = $transliterator->transliterate($str->string); + + return $str; + } + + return $this->lower(); + } + public function match(string $regexp, int $flags = 0, int $offset = 0): array { $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match'; @@ -363,6 +378,21 @@ public function title(bool $allWords = false): static return $str; } + /** + * @param string $locale In the format language_region (e.g. tr_TR) + */ + public function localeTitle(string $locale): static + { + if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Title')) { + $str = clone $this; + $str->string = $transliterator->transliterate($str->string); + + return $str; + } + + return $this->title(); + } + public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static { if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { @@ -450,6 +480,21 @@ public function upper(): static return $str; } + /** + * @param string $locale In the format language_region (e.g. tr_TR) + */ + public function localeUpper(string $locale): static + { + if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Upper')) { + $str = clone $this; + $str->string = $transliterator->transliterate($str->string); + + return $str; + } + + return $this->upper(); + } + public function width(bool $ignoreAnsiDecoration = true): int { $width = 0; @@ -587,4 +632,33 @@ private function wcswidth(string $string): int return $width; } + + private function getLocaleTransliterator(string $locale, string $id): ?\Transliterator + { + $rule = $locale.'-'.$id; + if (\array_key_exists($rule, self::$transliterators)) { + return self::$transliterators[$rule]; + } + + if (null !== $transliterator = self::$transliterators[$rule] = \Transliterator::create($rule)) { + return $transliterator; + } + + // Try to find a parent locale (nl_BE -> nl) + if (false === $i = strpos($locale, '_')) { + return null; + } + + $parentRule = substr_replace($locale, '-'.$id, $i); + + // Parent locale was already cached, return and store as current locale + if (\array_key_exists($parentRule, self::$transliterators)) { + return self::$transliterators[$rule] = self::$transliterators[$parentRule]; + } + + // Create transliterator based on parent locale and cache the result on both initial and parent locale values + $transliterator = \Transliterator::create($parentRule); + + return self::$transliterators[$rule] = self::$transliterators[$parentRule] = $transliterator; + } } diff --git a/src/Symfony/Component/String/CHANGELOG.md b/src/Symfony/Component/String/CHANGELOG.md index 31a3b54dbf911..621cedfcddedf 100644 --- a/src/Symfony/Component/String/CHANGELOG.md +++ b/src/Symfony/Component/String/CHANGELOG.md @@ -1,6 +1,11 @@ CHANGELOG ========= +7.1 +--- + + * Add `localeLower()`, `localeUpper()`, `localeTitle()` methods to `AbstractUnicodeString` + 6.2 --- diff --git a/src/Symfony/Component/String/Tests/AbstractUnicodeTestCase.php b/src/Symfony/Component/String/Tests/AbstractUnicodeTestCase.php index 1ed16bca1cd6a..17461fc6388df 100644 --- a/src/Symfony/Component/String/Tests/AbstractUnicodeTestCase.php +++ b/src/Symfony/Component/String/Tests/AbstractUnicodeTestCase.php @@ -50,6 +50,48 @@ public function testAsciiClosureRule() $this->assertSame('Dieser Wert sollte grOEsser oder gleich', (string) $s->ascii([$rule])); } + /** + * @dataProvider provideLocaleLower + * + * @requires extension intl + */ + public function testLocaleLower(string $locale, string $expected, string $origin) + { + $instance = static::createFromString($origin)->localeLower($locale); + + $this->assertNotSame(static::createFromString($origin), $instance); + $this->assertEquals(static::createFromString($expected), $instance); + $this->assertSame($expected, (string) $instance); + } + + /** + * @dataProvider provideLocaleUpper + * + * @requires extension intl + */ + public function testLocaleUpper(string $locale, string $expected, string $origin) + { + $instance = static::createFromString($origin)->localeUpper($locale); + + $this->assertNotSame(static::createFromString($origin), $instance); + $this->assertEquals(static::createFromString($expected), $instance); + $this->assertSame($expected, (string) $instance); + } + + /** + * @dataProvider provideLocaleTitle + * + * @requires extension intl + */ + public function testLocaleTitle(string $locale, string $expected, string $origin) + { + $instance = static::createFromString($origin)->localeTitle($locale); + + $this->assertNotSame(static::createFromString($origin), $instance); + $this->assertEquals(static::createFromString($expected), $instance); + $this->assertSame($expected, (string) $instance); + } + public function provideCreateFromCodePoint(): array { return [ @@ -291,6 +333,78 @@ public static function provideLower(): array ); } + public static function provideLocaleLower(): array + { + return [ + // Lithuanian + // Introduce an explicit dot above when lowercasing capital I's and J's + // whenever there are more accents above. + // LATIN CAPITAL LETTER I WITH OGONEK -> LATIN SMALL LETTER I WITH OGONEK + ['lt', 'į', 'Į'], + // LATIN CAPITAL LETTER I WITH GRAVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE + ['lt', 'i̇̀', 'Ì'], + // LATIN CAPITAL LETTER I WITH ACUTE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING ACUTE ACCENT + ['lt', 'i̇́', 'Í'], + // LATIN CAPITAL LETTER I WITH TILDE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING TILDE + ['lt', 'i̇̃', 'Ĩ'], + + // Turkish and Azeri + // When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into 'i'. + // LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I + ['tr', 'i', 'İ'], + ['tr_TR', 'i', 'İ'], + ['az', 'i', 'İ'], + + // Default casing rules + // LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE + ['en_US', 'i̇', 'İ'], + ['en', 'i̇', 'İ'], + ]; + } + + public static function provideLocaleUpper(): array + { + return [ + // Turkish and Azeri + // When uppercasing, i turns into a dotted capital I + // LATIN SMALL LETTER I -> LATIN CAPITAL LETTER I WITH DOT ABOVE + ['tr', 'İ', 'i'], + ['tr_TR', 'İ', 'i'], + ['az', 'İ', 'i'], + + // Greek + // Remove accents when uppercasing + // GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA + ['el', 'Α', 'ά'], + ['el_GR', 'Α', 'ά'], + + // Default casing rules + // GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA WITH TONOS + ['en_US', 'Ά', 'ά'], + ['en', 'Ά', 'ά'], + ]; + } + + public static function provideLocaleTitle(): array + { + return [ + // Greek + // Titlecasing words, should keep the accents on the first letter + ['el', 'Άδικος', 'άδικος'], + ['el_GR', 'Άδικος', 'άδικος'], + ['en', 'Άδικος', 'άδικος'], + + // Dutch + // Title casing should treat 'ij' as one character + ['nl_NL', 'IJssel', 'ijssel'], + ['nl_BE', 'IJssel', 'ijssel'], + ['nl', 'IJssel', 'ijssel'], + + // Default casing rules + ['en', 'Ijssel', 'ijssel'], + ]; + } + public static function provideUpper(): array { return array_merge(