|
| 1 | +#!/usr/bin/env php |
| 2 | +<?php |
| 3 | + |
| 4 | +/* |
| 5 | + * This file is part of the Symfony package. |
| 6 | + * |
| 7 | + * (c) Fabien Potencier <[email protected]> |
| 8 | + * |
| 9 | + * For the full copyright and license information, please view the LICENSE |
| 10 | + * file that was distributed with this source code. |
| 11 | + */ |
| 12 | + |
| 13 | +$sourceFilename = __DIR__.'/emoji-test.txt'; |
| 14 | + |
| 15 | +Builder::downloadSource($sourceFilename); |
| 16 | +$rules = Builder::buildRules($sourceFilename); |
| 17 | +Builder::saveRules($rules); |
| 18 | + |
| 19 | +class Builder |
| 20 | +{ |
| 21 | + public static function downloadSource(string $sourceFilename): void |
| 22 | + { |
| 23 | + if (!file_exists($sourceFilename)) { |
| 24 | + copy('https://www.unicode.org/Public/emoji/15.0/emoji-test.txt', $sourceFilename); |
| 25 | + } |
| 26 | + } |
| 27 | + |
| 28 | + public static function buildRules(string $filename, array &$errored = []): string |
| 29 | + { |
| 30 | + $file = file($filename); |
| 31 | + |
| 32 | + $maps = []; |
| 33 | + $errored = []; |
| 34 | + |
| 35 | + foreach ($file as $line) { |
| 36 | + $line = trim($line); |
| 37 | + if (!$line || str_starts_with($line, '#')) { |
| 38 | + continue; |
| 39 | + } |
| 40 | + |
| 41 | + // 263A FE0F ; fully-qualified # ☺️ E0.6 smiling face |
| 42 | + preg_match('{^(?<codePoints>[\w ]+) +; [\w-]+ +# (?<emoji>.+) E\d+\.\d+ ?(?<name>.+)$}Uu', $line, $matches); |
| 43 | + if (!$matches) { |
| 44 | + throw new \DomainException("Could not parse line: \"$line\"."); |
| 45 | + } |
| 46 | + |
| 47 | + ['codePoints' => $codePoints, 'emoji' => $emoji, 'name' => $name] = $matches; |
| 48 | + |
| 49 | + if (!Transliterator::createFromRules("$emoji <> test ;")) { |
| 50 | + $errored[] = ['line' => $line, 'matches' => $matches]; |
| 51 | + continue; |
| 52 | + } |
| 53 | + |
| 54 | + $name = preg_replace('/([^[:alnum:][:space:]])/u', '\\\\$1', $name); |
| 55 | + |
| 56 | + $codePointsCount = count(explode(' ', $codePoints)); |
| 57 | + |
| 58 | + $maps[$codePointsCount][$emoji] = $name; |
| 59 | + } |
| 60 | + |
| 61 | + // We must sort the maps by the number of code points, because the order really matters: |
| 62 | + // 🫶🏼 must be before 🫶 |
| 63 | + krsort($maps); |
| 64 | + $maps = array_merge(...$maps); |
| 65 | + |
| 66 | + $rules = ''; |
| 67 | + foreach ($maps as $emoji => $name) { |
| 68 | + $rules .= "$emoji <> $name ;\n"; |
| 69 | + } |
| 70 | + |
| 71 | + return $rules; |
| 72 | + } |
| 73 | + |
| 74 | + public static function saveRules(string $rules): void |
| 75 | + { |
| 76 | + file_put_contents(__DIR__.'/../data/transliterator/emoji-rules.txt', $rules); |
| 77 | + } |
| 78 | +} |
0 commit comments