Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0357501

Browse files
committed
ICU-22942 MF2 ICU4C: NFC-normalize names and keys according to spec
Includes adding !UCONFIG_NO_NORMALIZATION guards to all MF2 files
1 parent 376da67 commit 0357501

32 files changed

+283
-20
lines changed

icu4c/source/i18n/messageformat2.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include "unicode/utypes.h"
55

6+
#if !UCONFIG_NO_NORMALIZATION
7+
68
#if !UCONFIG_NO_FORMATTING
79

810
#if !UCONFIG_NO_MF2
@@ -11,8 +13,10 @@
1113
#include "unicode/messageformat2_data_model.h"
1214
#include "unicode/messageformat2_formattable.h"
1315
#include "unicode/messageformat2.h"
16+
#include "unicode/normalizer2.h"
1417
#include "unicode/unistr.h"
1518
#include "messageformat2_allocation.h"
19+
#include "messageformat2_checker.h"
1620
#include "messageformat2_evaluation.h"
1721
#include "messageformat2_macros.h"
1822

@@ -37,7 +41,7 @@ static Formattable evalLiteral(const Literal& lit) {
3741
// The fallback for a variable name is itself.
3842
UnicodeString str(DOLLAR);
3943
str += var;
40-
const Formattable* val = context.getGlobal(var, errorCode);
44+
const Formattable* val = context.getGlobal(*this, var, errorCode);
4145
if (U_SUCCESS(errorCode)) {
4246
return (FormattedPlaceholder(*val, str));
4347
}
@@ -52,9 +56,9 @@ static Formattable evalLiteral(const Literal& lit) {
5256
}
5357

5458
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
55-
const Operand& rand,
56-
MessageContext& context,
57-
UErrorCode &status) const {
59+
const Operand& rand,
60+
MessageContext& context,
61+
UErrorCode &status) const {
5862
if (U_FAILURE(status)) {
5963
return {};
6064
}
@@ -71,15 +75,19 @@ static Formattable evalLiteral(const Literal& lit) {
7175
// Eager vs. lazy evaluation is an open issue:
7276
// see https://github.com/unicode-org/message-format-wg/issues/299
7377

78+
// NFC-normalize the variable name. See
79+
// https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers
80+
const VariableName normalized = normalizeNFC(var);
81+
7482
// Look up the variable in the environment
75-
if (env.has(var)) {
83+
if (env.has(normalized)) {
7684
// `var` is a local -- look it up
77-
const Closure& rhs = env.lookup(var);
85+
const Closure& rhs = env.lookup(normalized);
7886
// Format the expression using the environment from the closure
7987
return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
8088
}
8189
// Variable wasn't found in locals -- check if it's global
82-
FormattedPlaceholder result = evalArgument(var, context, status);
90+
FormattedPlaceholder result = evalArgument(normalized, context, status);
8391
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
8492
status = U_ZERO_ERROR;
8593
// Unbound variable -- set a resolution error
@@ -761,6 +769,7 @@ void MessageFormatter::formatSelectors(MessageContext& context, const Environmen
761769
UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
762770
EMPTY_ON_ERROR(status);
763771

772+
764773
// Create a new environment that will store closures for all local variables
765774
Environment* env = Environment::create(status);
766775
// Create a new context with the given arguments and the `errors` structure
@@ -813,12 +822,14 @@ void MessageFormatter::check(MessageContext& context, const Environment& localEn
813822

814823
// Check that variable is in scope
815824
const VariableName& var = rand.asVariable();
825+
UnicodeString normalized = normalizeNFC(var);
826+
816827
// Check local scope
817-
if (localEnv.has(var)) {
828+
if (localEnv.has(normalized)) {
818829
return;
819830
}
820831
// Check global scope
821-
context.getGlobal(var, status);
832+
context.getGlobal(*this, normalized, status);
822833
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
823834
status = U_ZERO_ERROR;
824835
context.getErrors().setUnresolvedVariable(var, status);
@@ -855,7 +866,10 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*&
855866
// memoizing the value of localEnv up to this point
856867

857868
// Add the LHS to the environment for checking the next declaration
858-
env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
869+
env = Environment::create(normalizeNFC(decl.getVariable()),
870+
Closure(rhs, *env),
871+
env,
872+
status);
859873
CHECK_ERROR(status);
860874
}
861875
}
@@ -866,3 +880,5 @@ U_NAMESPACE_END
866880
#endif /* #if !UCONFIG_NO_MF2 */
867881

868882
#endif /* #if !UCONFIG_NO_FORMATTING */
883+
884+
#endif /* #if !UCONFIG_NO_NORMALIZATION */

icu4c/source/i18n/messageformat2_allocation.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
#if U_SHOW_CPLUSPLUS_API
1212

13+
#if !UCONFIG_NO_NORMALIZATION
14+
1315
#if !UCONFIG_NO_FORMATTING
1416

1517
#if !UCONFIG_NO_MF2
@@ -139,6 +141,8 @@ U_NAMESPACE_END
139141

140142
#endif /* #if !UCONFIG_NO_FORMATTING */
141143

144+
#endif /* #if !UCONFIG_NO_NORMALIZATION */
145+
142146
#endif /* U_SHOW_CPLUSPLUS_API */
143147

144148
#endif // MESSAGEFORMAT2_UTILS_H

icu4c/source/i18n/messageformat2_arguments.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33

44
#include "unicode/utypes.h"
55

6+
#if !UCONFIG_NO_NORMALIZATION
7+
68
#if !UCONFIG_NO_FORMATTING
79

810
#if !UCONFIG_NO_MF2
911

12+
#include "unicode/messageformat2.h"
1013
#include "unicode/messageformat2_arguments.h"
1114
#include "unicode/messageformat2_data_model_names.h"
15+
#include "messageformat2_evaluation.h"
1216
#include "uvector.h" // U_ASSERT
1317

1418
U_NAMESPACE_BEGIN
@@ -22,11 +26,15 @@ namespace message2 {
2226

2327
using Arguments = MessageArguments;
2428

25-
const Formattable* Arguments::getArgument(const VariableName& arg, UErrorCode& errorCode) const {
29+
const Formattable* Arguments::getArgument(const MessageFormatter& context,
30+
const VariableName& arg,
31+
UErrorCode& errorCode) const {
2632
if (U_SUCCESS(errorCode)) {
2733
U_ASSERT(argsLen == 0 || arguments.isValid());
2834
for (int32_t i = 0; i < argsLen; i++) {
29-
if (argumentNames[i] == arg) {
35+
UnicodeString normalized = context.normalizeNFC(argumentNames[i]);
36+
// arg already assumed to be normalized
37+
if (normalized == arg) {
3038
return &arguments[i];
3139
}
3240
}
@@ -57,3 +65,5 @@ U_NAMESPACE_END
5765
#endif /* #if !UCONFIG_NO_MF2 */
5866

5967
#endif /* #if !UCONFIG_NO_FORMATTING */
68+
69+
#endif /* #if !UCONFIG_NO_NORMALIZATION */

icu4c/source/i18n/messageformat2_checker.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33

44
#include "unicode/utypes.h"
55

6+
#if !UCONFIG_NO_NORMALIZATION
7+
68
#if !UCONFIG_NO_FORMATTING
79

810
#if !UCONFIG_NO_MF2
911

12+
#include "unicode/messageformat2.h"
1013
#include "messageformat2_allocation.h"
1114
#include "messageformat2_checker.h"
15+
#include "messageformat2_evaluation.h"
1216
#include "messageformat2_macros.h"
1317
#include "uvector.h" // U_ASSERT
1418

@@ -104,6 +108,13 @@ TypeEnvironment::~TypeEnvironment() {}
104108

105109
// ---------------------
106110

111+
UnicodeString Checker::normalizeNFC(const Key& k) const {
112+
if (k.isWildcard()) {
113+
return UnicodeString("*");
114+
}
115+
return context.normalizeNFC(k.asLiteral().unquoted());
116+
}
117+
107118
static bool areDefaultKeys(const Key* keys, int32_t len) {
108119
U_ASSERT(len > 0);
109120
for (int32_t i = 0; i < len; i++) {
@@ -185,7 +196,7 @@ void Checker::checkVariants(UErrorCode& status) {
185196
// This variant was already checked,
186197
// so we know keys1.len == len
187198
for (int32_t kk = 0; kk < len; kk++) {
188-
if (!(keys[kk] == keys1[kk])) {
199+
if (!(normalizeNFC(keys[kk]) == normalizeNFC(keys1[kk]))) {
189200
allEqual = false;
190201
break;
191202
}
@@ -312,3 +323,5 @@ U_NAMESPACE_END
312323
#endif /* #if !UCONFIG_NO_MF2 */
313324

314325
#endif /* #if !UCONFIG_NO_FORMATTING */
326+
327+
#endif /* #if !UCONFIG_NO_NORMALIZATION */

icu4c/source/i18n/messageformat2_checker.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
#if U_SHOW_CPLUSPLUS_API
1212

13+
#if !UCONFIG_NO_NORMALIZATION
14+
1315
#if !UCONFIG_NO_FORMATTING
1416

1517
#if !UCONFIG_NO_MF2
@@ -56,14 +58,19 @@ namespace message2 {
5658
// an explicit declaration
5759
}; // class TypeEnvironment
5860

61+
class MessageFormatter;
62+
5963
// Checks a data model for semantic errors
6064
// (Errors are defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md )
6165
class Checker {
6266
public:
6367
void check(UErrorCode&);
64-
Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {}
68+
Checker(const MFDataModel& d, StaticErrors& e, const MessageFormatter& mf)
69+
: dataModel(d), errors(e), context(mf) {}
6570
private:
6671

72+
UnicodeString normalizeNFC(const Key&) const;
73+
6774
void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
6875
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
6976
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
@@ -78,6 +85,9 @@ namespace message2 {
7885
void check(const Pattern&);
7986
const MFDataModel& dataModel;
8087
StaticErrors& errors;
88+
89+
// Used for NFC normalization
90+
const MessageFormatter& context;
8191
}; // class Checker
8292

8393
} // namespace message2
@@ -88,6 +98,8 @@ U_NAMESPACE_END
8898

8999
#endif /* #if !UCONFIG_NO_FORMATTING */
90100

101+
#endif /* #if !UCONFIG_NO_NORMALIZATION */
102+
91103
#endif /* U_SHOW_CPLUSPLUS_API */
92104

93105
#endif // MESSAGEFORMAT_CHECKER_H

icu4c/source/i18n/messageformat2_data_model.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include "unicode/utypes.h"
55

6+
#if !UCONFIG_NO_NORMALIZATION
7+
68
#if !UCONFIG_NO_FORMATTING
79

810
#if !UCONFIG_NO_MF2
@@ -918,3 +920,5 @@ U_NAMESPACE_END
918920
#endif /* #if !UCONFIG_NO_MF2 */
919921

920922
#endif /* #if !UCONFIG_NO_FORMATTING */
923+
924+
#endif /* #if !UCONFIG_NO_NORMALIZATION */

icu4c/source/i18n/messageformat2_errors.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include "unicode/utypes.h"
55

6+
#if !UCONFIG_NO_NORMALIZATION
7+
68
#if !UCONFIG_NO_FORMATTING
79

810
#if !UCONFIG_NO_MF2
@@ -290,3 +292,5 @@ U_NAMESPACE_END
290292
#endif /* #if !UCONFIG_NO_MF2 */
291293

292294
#endif /* #if !UCONFIG_NO_FORMATTING */
295+
296+
#endif /* #if !UCONFIG_NO_NORMALIZATION */

icu4c/source/i18n/messageformat2_errors.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
1616
*/
1717

18+
#if !UCONFIG_NO_NORMALIZATION
19+
1820
#if !UCONFIG_NO_FORMATTING
1921

2022
#if !UCONFIG_NO_MF2
@@ -151,6 +153,8 @@ U_NAMESPACE_END
151153

152154
#endif /* #if !UCONFIG_NO_FORMATTING */
153155

156+
#endif /* #if !UCONFIG_NO_NORMALIZATION */
157+
154158
#endif /* U_SHOW_CPLUSPLUS_API */
155159

156160
#endif // MESSAGEFORMAT2_ERRORS_H

icu4c/source/i18n/messageformat2_evaluation.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include "unicode/utypes.h"
55

6+
#if !UCONFIG_NO_NORMALIZATION
7+
68
#if !UCONFIG_NO_FORMATTING
79

810
#if !UCONFIG_NO_MF2
@@ -190,13 +192,16 @@ PrioritizedVariant::~PrioritizedVariant() {}
190192
errors.checkErrors(status);
191193
}
192194

193-
const Formattable* MessageContext::getGlobal(const VariableName& v, UErrorCode& errorCode) const {
194-
return arguments.getArgument(v, errorCode);
195+
const Formattable* MessageContext::getGlobal(const MessageFormatter& context,
196+
const VariableName& v,
197+
UErrorCode& errorCode) const {
198+
return arguments.getArgument(context, v, errorCode);
195199
}
196200

197201
MessageContext::MessageContext(const MessageArguments& args,
198202
const StaticErrors& e,
199203
UErrorCode& status) : arguments(args), errors(e, status) {}
204+
200205
MessageContext::~MessageContext() {}
201206

202207
} // namespace message2
@@ -205,3 +210,5 @@ U_NAMESPACE_END
205210
#endif /* #if !UCONFIG_NO_MF2 */
206211

207212
#endif /* #if !UCONFIG_NO_FORMATTING */
213+
214+
#endif /* #if !UCONFIG_NO_NORMALIZATION */

icu4c/source/i18n/messageformat2_evaluation.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* \file
1515
* \brief C++ API: Formats messages using the draft MessageFormat 2.0.
1616
*/
17+
#if !UCONFIG_NO_NORMALIZATION
1718

1819
#if !UCONFIG_NO_FORMATTING
1920

@@ -174,11 +175,15 @@ namespace message2 {
174175
// The context contains all the information needed to process
175176
// an entire message: arguments, formatter cache, and error list
176177

178+
class MessageFormatter;
179+
177180
class MessageContext : public UMemory {
178181
public:
179182
MessageContext(const MessageArguments&, const StaticErrors&, UErrorCode&);
180183

181-
const Formattable* getGlobal(const VariableName&, UErrorCode&) const;
184+
const Formattable* getGlobal(const MessageFormatter&,
185+
const VariableName&,
186+
UErrorCode&) const;
182187

183188
// If any errors were set, update `status` accordingly
184189
void checkErrors(UErrorCode& status) const;
@@ -191,6 +196,7 @@ namespace message2 {
191196
const MessageArguments& arguments; // External message arguments
192197
// Errors accumulated during parsing/formatting
193198
DynamicErrors errors;
199+
194200
}; // class MessageContext
195201

196202
} // namespace message2
@@ -201,6 +207,8 @@ U_NAMESPACE_END
201207

202208
#endif /* #if !UCONFIG_NO_FORMATTING */
203209

210+
#endif /* #if !UCONFIG_NO_NORMALIZATION */
211+
204212
#endif /* U_SHOW_CPLUSPLUS_API */
205213

206214
#endif // MESSAGEFORMAT2_EVALUATION_H

0 commit comments

Comments
 (0)