diff --git a/README.md b/README.md index 5154bc3dd7..fdc0ead98f 100644 --- a/README.md +++ b/README.md @@ -25,36 +25,32 @@ Messages can interpolate arguments formatted using _formatting functions_: Messages can define variants which correspond to the grammatical (or other) requirements of the language: - {{ - match {$count :number} - when 1 {{You have one notification.}} - when * {{You have {$count} notifications.}} - }} + .match {$count :number} + 1 {{You have one notification.}} + * {{You have {$count} notifications.}} The message syntax is also capable of expressing more complex translation, for example: - {{ - local $hostName = {$host :person firstName=long} - local $guestName = {$guest :person firstName=long} - local $guestsOther = {$guestCount :number offset=1} + .local $hostName = {$host :person firstName=long} + .local $guestName = {$guest :person firstName=long} + .local $guestsOther = {$guestCount :number offset=1} - match {$host :gender} {$guestOther :number} + .match {$host :gender} {$guestOther :number} - when female 0 {{{$hostName} does not give a party.}} - when female 1 {{{$hostName} invites {$guestName} to her party.}} - when female 2 {{{$hostName} invites {$guestName} and one other person to her party.}} - when female * {{{$hostName} invites {$guestName} and {$guestsOther} other people to her party.}} + female 0 {{{$hostName} does not give a party.}} + female 1 {{{$hostName} invites {$guestName} to her party.}} + female 2 {{{$hostName} invites {$guestName} and one other person to her party.}} + female * {{{$hostName} invites {$guestName} and {$guestsOther} other people to her party.}} - when male 0 {{{$hostName} does not give a party.}} - when male 1 {{{$hostName} invites {$guestName} to his party.}} - when male 2 {{{$hostName} invites {$guestName} and one other person to his party.}} - when male * {{{$hostName} invites {$guestName} and {$guestsOther} other people to his party.}} + male 0 {{{$hostName} does not give a party.}} + male 1 {{{$hostName} invites {$guestName} to his party.}} + male 2 {{{$hostName} invites {$guestName} and one other person to his party.}} + male * {{{$hostName} invites {$guestName} and {$guestsOther} other people to his party.}} - when * 0 {{{$hostName} does not give a party.}} - when * 1 {{{$hostName} invites {$guestName} to their party.}} - when * 2 {{{$hostName} invites {$guestName} and one other person to their party.}} - when * * {{{$hostName} invites {$guestName} and {$guestsOther} other people to their party.}} - }} + * 0 {{{$hostName} does not give a party.}} + * 1 {{{$hostName} invites {$guestName} to their party.}} + * 2 {{{$hostName} invites {$guestName} and one other person to their party.}} + * * {{{$hostName} invites {$guestName} and {$guestsOther} other people to their party.}} See more examples and the formal definition of the grammar in [spec/syntax.md](./spec/syntax.md). diff --git a/spec/data-model/README.md b/spec/data-model/README.md index 7ba584d548..3577f72dbf 100644 --- a/spec/data-model/README.md +++ b/spec/data-model/README.md @@ -25,6 +25,10 @@ Two equivalent definitions of the data model are also provided: A `SelectMessage` corresponds to a syntax message that includes _selectors_. A message without _selectors_ and with a single _pattern_ is represented by a `PatternMessage`. +In the syntax, +a `PatternMessage` may be represented either as a _simple message_ or as a _complex message_, +depending on whether it has declarations and if its `pattern` is allowed in a _simple message_. + ```ts type Message = PatternMessage | SelectMessage; @@ -43,15 +47,47 @@ interface SelectMessage { ``` Each message _declaration_ is represented by a `Declaration`, -which connects the `name` of the _variable_ +which connects the `name` of a _variable_ with its _expression_ `value`. The `name` does not include the initial `$` of the _variable_. +The `name` of an `InputDeclaration` MUST be the same +as the `name` in the `VariableRef` of its `VariableExpression` `value`. + +An `UnsupportedStatement` represents a statement not supported by the implementation. +Its `keyword` is a non-empty string name (i.e. not including the initial `.`). +If not empty, the `body` is the "raw" value (i.e. escape sequences are not processed) +starting after the keyword and up to the first _expression_, +not including leading or trailing whitespace. +The non-empty `expressions` correspond to the trailing _expressions_ of the _reserved statement_. + +> **Note** +> Be aware that future versions of this specification +> might assign meaning to _reserved statement_ values. +> This would result in new interfaces being added to +> this data model. + ```ts -interface Declaration { +type Declaration = InputDeclaration | LocalDeclaration | UnsupportedStatement; + +interface InputDeclaration { + type: "input"; + name: string; + value: VariableExpression; +} + +interface LocalDeclaration { + type: "local"; name: string; value: Expression; } + +interface UnsupportedStatement { + type: "unsupported-statement"; + keyword: string; + body?: string; + expressions: Expression[]; +} ``` In a `SelectMessage`, @@ -74,28 +110,35 @@ interface CatchallKey { ## Patterns Each `Pattern` represents a linear sequence, without selectors. -Each element of the sequence MUST have either a `Text` or an `Expression` shape. -`Text` represents literal _text_, +Each element of the `body` array MUST either be a non-empty string or an `Expression` object. +String values represent literal _text_, while `Expression` wraps each of the potential _expression_ shapes. -The `value` of `Text` is the "cooked" value (i.e. escape sequences are processed). +The `body` strings are the "cooked" _text_ values, i.e. escape sequences are processed. -Implementations MUST NOT rely on the set of `Expression` `body` values being exhaustive, +Implementations MUST NOT rely on the set of `Expression` interfaces being exhaustive, as future versions of this specification MAY define additional expressions. -A `body` with an unrecognized value SHOULD be treated as an `Unsupported` value. +An `Expression` `func` with an unrecognized value SHOULD be treated as an `UnsupportedExpression` value. ```ts interface Pattern { - body: Array; + body: Array; } -interface Text { - type: "text"; - value: string; +type Expression = LiteralExpression | VariableExpression | FunctionExpression; + +interface LiteralExpression { + arg: Literal; + func?: FunctionRef | UnsupportedExpression; +} + +interface VariableExpression { + arg: VariableRef; + func?: FunctionRef | UnsupportedExpression; } -interface Expression { - type: "expression"; - body: Literal | VariableRef | FunctionRef | Unsupported; +interface FunctionExpression { + arg?: never; + func: FunctionRef | UnsupportedExpression; } ``` @@ -148,8 +191,8 @@ interface Option { } ``` -An `Unsupported` represents an _expression_ with a -_reserved_ _annotation_ or a _private-use_ _annotation_ not supported +An `UnsupportedExpression` represents an _expression_ with a +_reserved annotation_ or a _private-use annotation_ not supported by the implementation. The `sigil` corresponds to the starting sigil of the _annotation_. The `source` is the "raw" value (i.e. escape sequences are not processed) @@ -157,22 +200,22 @@ and does not include the starting `sigil`. > **Note** > Be aware that future versions of this specification -> might assign meaning to _reserved_ `sigil` values. +> might assign meaning to _reserved annotation_ `sigil` values. > This would result in new interfaces being added to > this data model. If the _expression_ includes a _literal_ or _variable_ before the _annotation_, it is included as the `operand`. -When parsing the syntax of a _message_ that includes a _private-use_ _annotation_ +When parsing the syntax of a _message_ that includes a _private-use annotation_ supported by the implementation, the implementation SHOULD represent it in the data model using an interface appropriate for the semantics and meaning that the implementation attaches to that _annotation_. ```ts -interface Unsupported { - type: "unsupported"; +interface UnsupportedExpression { + type: "unsupported-expression"; sigil: "!" | "@" | "#" | "%" | "^" | "&" | "*" | "<" | ">" | "/" | "?" | "~"; source: string; operand?: Literal | VariableRef; diff --git a/spec/data-model/message.dtd b/spec/data-model/message.dtd index 5da214b44c..5f1810f39b 100644 --- a/spec/data-model/message.dtd +++ b/spec/data-model/message.dtd @@ -1,7 +1,20 @@ - + + - + + + + @@ -9,7 +22,11 @@ - + + @@ -17,15 +34,13 @@ - + - - - - + + diff --git a/spec/data-model/message.json b/spec/data-model/message.json index 688c0d4a22..4b6c154ffd 100644 --- a/spec/data-model/message.json +++ b/spec/data-model/message.json @@ -32,7 +32,6 @@ "type": { "const": "function" }, "kind": { "enum": ["open", "close", "value"] }, "name": { "type": "string" }, - "operand": { "$ref": "#/$defs/value" }, "options": { "type": "array", "items": { @@ -47,69 +46,110 @@ }, "required": ["type", "kind", "name"] }, - "unsupported": { + "unsupported-expression": { "type": "object", "properties": { - "type": { "const": "unsupported" }, + "type": { "const": "unsupported-expression" }, "sigil": { "enum": ["!", "@", "#", "%", "^", "&", "*", "<", ">", "?", "~"] }, - "source": { "type": "string" }, - "operand": { "$ref": "#/$defs/value" } + "source": { "type": "string" } }, "required": ["type", "sigil", "source"] }, + "annotation": { + "oneOf": [ + { "$ref": "#/$defs/function" }, + { "$ref": "#/$defs/unsupported-expression" } + ] + }, - "text": { + "literal-expression": { "type": "object", "properties": { - "type": { "const": "text" }, - "value": { "type": "string" } + "arg": { "$ref": "#/$defs/literal" }, + "func": { "$ref": "#/$defs/annotation" } }, - "required": ["type", "value"] + "required": ["arg"] }, - "expression": { + "variable-expression": { "type": "object", "properties": { - "type": { "const": "expression" }, - "body": { - "oneOf": [ - { "$ref": "#/$defs/literal" }, - { "$ref": "#/$defs/variable" }, - { "$ref": "#/$defs/function" }, - { "$ref": "#/$defs/unsupported" } - ] - } + "arg": { "$ref": "#/$defs/variable" }, + "func": { "$ref": "#/$defs/annotation" } + }, + "required": ["arg"] + }, + "function-expression": { + "type": "object", + "properties": { + "func": { "$ref": "#/$defs/annotation" } }, - "required": ["type", "body"] + "required": ["func"] }, + "expression": { + "oneOf": [ + { "$ref": "#/$defs/literal-expression" }, + { "$ref": "#/$defs/variable-expression" }, + { "$ref": "#/$defs/function-expression" } + ] + }, + "pattern": { "type": "object", "properties": { "body": { "type": "array", "items": { - "oneOf": [ - { "$ref": "#/$defs/text" }, - { "$ref": "#/$defs/expression" } - ] + "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/expression" }] } } }, "required": ["body"] }, + "input-declaration": { + "type": "object", + "properties": { + "type": { "const": "input" }, + "name": { "type": "string" }, + "value": { "$ref": "#/$defs/variable-expression" } + }, + "required": ["type", "name", "value"] + }, + "local-declaration": { + "type": "object", + "properties": { + "type": { "const": "local" }, + "name": { "type": "string" }, + "value": { "$ref": "#/$defs/expression" } + }, + "required": ["type", "name", "value"] + }, + "unsupported-statement": { + "type": "object", + "properties": { + "type": { "const": "unsupported-statement" }, + "keyword": { "type": "string" }, + "body": { "type": "string" }, + "expressions": { + "type": "array", + "items": { "$ref": "#/$defs/expression" } + } + }, + "required": ["type", "keyword", "expressions"] + }, "declarations": { "type": "array", "items": { - "type": "object", - "properties": { - "name": { "type": "string" }, - "value": { "$ref": "#/$defs/expression" } - }, - "required": ["target", "value"] + "oneOf": [ + { "$ref": "#/$defs/input-declaration" }, + { "$ref": "#/$defs/local-declaration" }, + { "$ref": "#/$defs/unsupported-statement" } + ] } }, + "variant-key": { "oneOf": [ { "$ref": "#/$defs/literal" }, diff --git a/spec/formatting.md b/spec/formatting.md index c8179ef12d..dd9570bd7c 100644 --- a/spec/formatting.md +++ b/spec/formatting.md @@ -40,6 +40,10 @@ Formatting of a _message_ is defined by the following operations: - **_Pattern Selection_** determines which of a message's _patterns_ is formatted. For a message with no _selectors_, this is simple as there is only one _pattern_. With _selectors_, this will depend on their resolution. + + At the start of _pattern selection_, + if the _message_ contains any _reserved statements_, + emit an Unsupported Statement Error. - **_Formatting_** takes the resolved values of the selected _pattern_, and produces the formatted result for the _message_. @@ -111,13 +115,13 @@ and different implementations MAY choose to perform different levels of resoluti > or some other locally appropriate value. Depending on the presence or absence of a _variable_ or _literal_ operand -and a _function_, _private-use_, or _reserved_ _annotation_, +and a _function_, _private-use annotation_, or _reserved annotation_, the resolved value of the _expression_ is determined as follows: -If the _expression_ contains a _reserved_ _annotation_, +If the _expression_ contains a _reserved annotation_, an `Unsupported Expression` error is emitted and a fallback value is used as its value. -Else, if the _expression_ contains a _private-use_ _annotation_, +Else, if the _expression_ contains a _private-use annotation_, its resolved value is defined according to the implementation's specification. Else, if the _expression_ contains an _annotation_, @@ -153,12 +157,10 @@ its resolved value is defined by _literal resolution_. > an _annotation_ needs to be provided: > > ``` -> {{ -> local $aNumber = {1234 :number} -> local $aDate = {|2023-08-30| :datetime} -> local $aFoo = {|some foo| :foo} +> .local $aNumber = {1234 :number} +> .local $aDate = {|2023-08-30| :datetime} +> .local $aFoo = {|some foo| :foo} > {{You have {42 :number}}} -> }} > ``` ### Literal Resolution @@ -252,9 +254,9 @@ An _expression_ fails to resolve when: - A _variable_ _operand_ fails to resolve. - A _function_ _annotation_ fails to resolve. -- A _private-use_ _annotation_ is unsupported by the implementation or if - a _private-use_ _annotation_ fails to resolve. -- The _expression_ has a _reserved_ _annotation_. +- A _private-use annotation_ is unsupported by the implementation or if + a _private-use annotation_ fails to resolve. +- The _expression_ has a _reserved annotation_. The _fallback value_ depends on the contents of the _expression_: @@ -290,17 +292,13 @@ rather than the _expression_ in the _selector_ or _pattern_. > attempting to format either of the following messages: > > ``` -> {{ -> local $var = {|horse| :func} +> .local $var = {|horse| :func} > {{The value is {$var}.}} -> }} > ``` > > ``` -> {{ -> local $var = {|horse|} +> .local $var = {|horse|} > {{The value is {$var :func}.}} -> }} > ``` > > would in both cases result in the _pattern_ _expression_ @@ -310,38 +308,36 @@ _Pattern selection_ is not supported for _fallback values_. ## Pattern Selection -When a _message_ contains a _match_ construct with one or more _expressions_, +When a _message_ contains a _matcher_ with one or more _selectors_, the implementation needs to determine which _variant_ will be used to provide the _pattern_ for the formatting operation. This is done by ordering and filtering the available _variant_ statements according to their _key_ values and selecting the first one. -The number of _keys_ in each _variant_ MUST equal the number of _expressions_ in the _selectors_. +The number of _keys_ in each _variant_ MUST equal the number of _selectors_. -Each _key_ corresponds to an _expression_ in the _selectors_ by its position in the _variant_. +Each _key_ corresponds to a _selector_ by its position in the _variant_. > For example, in this message: > > ``` -> {{ -> match {:one} {:two} {:three} -> when 1 2 3 {{ ... }} -> }} +> .match {:one} {:two} {:three} +> 1 2 3 {{ ... }} > ``` > -> The first _key_ `1` corresponds to the first _expression_ in the _selectors_ (`{:one}`), -> the second _key_ `2` to the second _expression_ (`{:two}`), -> and the third _key_ `3` to the third _expression_ (`{:three}`). +> The first _key_ `1` corresponds to the first _selector_ (`{:one}`), +> the second _key_ `2` to the second _selector_ (`{:two}`), +> and the third _key_ `3` to the third _selector_ (`{:three}`). To determine which _variant_ best matches a given set of inputs, each _selector_ is used in turn to order and filter the list of _variants_. -Each _variant_ with a _key_ that does not match its corresponding _selector expression_ +Each _variant_ with a _key_ that does not match its corresponding _selector_ is omitted from the list of _variants_. -The remaining _variants_ are sorted according to the _expression_'s _key_-ordering preference. -Earlier _expressions_ in the _selector_'s list of _expressions_ have a higher priority than later ones. +The remaining _variants_ are sorted according to the _selector_'s _key_-ordering preference. +Earlier _selectors_ in the _matcher_'s list of _selectors_ have a higher priority than later ones. -When all of the _selector expressions_ have been processed, +When all of the _selectors_ have been processed, the earliest-sorted _variant_ in the remaining list of _variants_ is selected. This selection method is defined in more detail below. @@ -355,11 +351,11 @@ or if this is not available or empty, the U+FFFD REPLACEMENT CHARACTER `�`. ### Resolve Selectors -First, resolve the values of each _selector_ _expression_: +First, resolve the values of each _selector_: 1. Let `res` be a new empty list of resolved values that support selection. -1. For each _expression_ `exp` of the message's _selectors_, - 1. Let `rv` be the resolved value of `exp`. +1. For each _selector_ `sel`, in source order, + 1. Let `rv` be the resolved value of `sel`. 1. If selection is supported for `rv`: 1. Append `rv` as the last element of the list `res`. 1. Else: @@ -468,12 +464,10 @@ the variable reference `$bar` resolves to the string `'bar'`, pattern selection proceeds as follows for this message: ``` -{{ -match {$foo :string} {$bar :string} -when bar bar {{All bar}} -when foo foo {{All foo}} -when * * {{Otherwise}} -}} +.match {$foo :string} {$bar :string} +bar bar {{All bar}} +foo foo {{All foo}} +* * {{Otherwise}} ``` 1. For the first selector:
@@ -493,7 +487,7 @@ when * * {{Otherwise}} resulting in a list « `* *` » of variants. 4. As the list `vars` only has one entry, it does not need to be sorted.
- The pattern `{Otherwise}` of the third variant is selected. + The pattern `Otherwise` of the third variant is selected. #### Example 2 @@ -501,13 +495,11 @@ Alternatively, with the same implementation and formatting context as in Example pattern selection would proceed as follows for this message: ``` -{{ -match {$foo :string} {$bar :string} -when * bar {{Any and bar}} -when foo * {{Foo and any}} -when foo bar {{Foo and bar}} -when * * {{Otherwise}} -}} +.match {$foo :string} {$bar :string} +* bar {{Any and bar}} +foo * {{Foo and any}} +foo bar {{Foo and bar}} +* * {{Otherwise}} ``` 1. For the first selector:
@@ -535,7 +527,7 @@ when * * {{Otherwise}} This is then sorted as:
« ( 0, `foo bar` ), ( 0, `foo *` ), ( 1, `* bar` ), ( 1, `* *` ) ».
-5. The pattern `{Foo and bar}` of the most preferred `foo bar` variant is selected. +5. The pattern `Foo and bar` of the most preferred `foo bar` variant is selected. #### Example 3 @@ -551,12 +543,10 @@ and an `en` (English) locale, the pattern selection proceeds as follows for this message: ``` -{{ -match {$count :plural} -when one {{Category match}} -when 1 {{Exact match}} -when * {{Other match}} -}} +.match {$count :plural} +one {{Category match}} +1 {{Exact match}} +* {{Other match}} ``` 1. For the selector:
@@ -577,7 +567,7 @@ when * {{Other match}} This is then sorted as:
« ( 0, `1` ), ( 1, `one` ), ( 2, `*` ) »
-4. The pattern `{Exact match}` of the most preferred `1` variant is selected. +4. The pattern `Exact match` of the most preferred `1` variant is selected. ## Formatting @@ -615,7 +605,7 @@ _This section is non-normative._ 1. An implementation might choose to return an interstitial object so that the caller can "decorate" portions of the formatted value. In ICU4J, the `NumberFormatter` class returns a `FormattedNumber` object, - so a _pattern_ such as `{This is my number {42 :number}}` might return + so a _pattern_ such as `This is my number {42 :number}` might return the character sequence `This is my number ` followed by a `FormattedNumber` object representing the value `42` in the current locale. @@ -712,8 +702,11 @@ These are divided into the following categories: > Example invalid messages resulting in a Syntax error: > > ``` - > {{{{Missing end braces - > {{{{Missing one end brace}}} + > {{Missing end braces + > ``` + > + > ``` + > {{Missing one end brace} > ``` > > ``` @@ -721,7 +714,7 @@ These are divided into the following categories: > ``` > > ``` - > {{local $var = {|no message body|}}} + > .local $var = {|no message body|} > ``` - **Data Model errors** occur when a message is invalid due to @@ -733,20 +726,16 @@ These are divided into the following categories: > Example invalid messages resulting in a Variant Key Mismatch error: > > ``` - > {{ - > match {$one :func} - > when 1 2 {{Too many}} - > when * {{Otherwise}} - > }} + > .match {$one :func} + > 1 2 {{Too many}} + > * {{Otherwise}} > ``` > > ``` - > {{ - > match {$one :func} {$two :func} - > when 1 2 {{Two keys}} - > when * {{Missing a key}} - > when * * {{Otherwise}} - > }} + > .match {$one :func} {$two :func} + > 1 2 {{Two keys}} + > * {{Missing a key}} + > * * {{Otherwise}} > ``` - **Missing Fallback Variant errors** occur when the message @@ -755,19 +744,15 @@ These are divided into the following categories: > Example invalid messages resulting in a Missing Fallback Variant error: > > ``` - > {{ - > match {$one :func} - > when 1 {{Value is one}} - > when 2 {{Value is two}} - > }} + > .match {$one :func} + > 1 {{Value is one}} + > 2 {{Value is two}} > ``` > > ``` - > {{ - > match {$one :func} {$two :func} - > when 1 * {{First is one}} - > when * 1 {{Second is one}} - > }} + > .match {$one :func} {$two :func} + > 1 * {{First is one}} + > * 1 {{Second is one}} > ``` - A **_Missing Selector Annotation error_** is an error that occurs when the _message_ @@ -777,29 +762,23 @@ These are divided into the following categories: > Examples of invalid messages resulting in a _Missing Selector Annotation error_: > > ``` - > {{ - > match {$one} - > when 1 {{Value is one}} - > when * {{Value is not one}} - > }} + > .match {$one} + > 1 {{Value is one}} + > * {{Value is not one}} > ``` > > ``` - > {{ - > local $one = {|The one|} - > match {$one} - > when 1 {{Value is one}} - > when * {{Value is not one}} - > }} + > .local $one = {|The one|} + > .match {$one} + > 1 {{Value is one}} + > * {{Value is not one}} > ``` > > ``` - > {{ - > input {$one} - > match {$one} - > when 1 {{Value is one}} - > when * {{Value is not one}} - > }} + > .input {$one} + > .match {$one} + > 1 {{Value is one}} + > * {{Value is not one}} > ``` - A **Duplicate Declaration error** occurs when a _variable_ appears in two _declarations_. @@ -810,27 +789,22 @@ These are divided into the following categories: > Examples of invalid messages resulting in a Duplicate Declaration error: > > ``` - > {{ - > input {$var :number maxFractionDigits=0} - > input {$var :number minFractionDigits=0} - > {{Redeclaration of the same variable}} - > }} - > {{ - > local $var = {$ext :number maxFractionDigits=0} - > input {$var :number minFractionDigits=0} - > {{Redeclaration of a local variable}} - > }} - > {{ - > input {$var :number minFractionDigits=0} - > local $var = {$ext :number maxFractionDigits=0} - > {{Redeclaration of an input variable}} - > }} - > {{ - > local $var = {$ext :someFunction} - > local $var = {$error} - > local $var2 = {$var2 :error} - > {{{$var} cannot be redefined. {$var2} cannot refer to itself}} - > }} + > .input {$var :number maxFractionDigits=0} + > .input {$var :number minFractionDigits=0} + > {{Redeclaration of the same variable}} + > + > .local $var = {$ext :number maxFractionDigits=0} + > .input {$var :number minFractionDigits=0} + > {{Redeclaration of a local variable}} + > + > .input {$var :number minFractionDigits=0} + > .local $var = {$ext :number maxFractionDigits=0} + > {{Redeclaration of an input variable}} + > + > .local $var = {$ext :someFunction} + > .local $var = {$error} + > .local $var2 = {$var2 :error} + > {{{$var} cannot be redefined. {$var2} cannot refer to itself}} > ``` - A **Duplicate Option Name error** occurs when the same _identifier_ @@ -844,10 +818,8 @@ These are divided into the following categories: > ``` > > ``` - > {{ - > local $foo = {horse :func one=1 two=2 one=1} + > .local $foo = {horse :func one=1 two=2 one=1} > {{This is {$foo}}} - > }} > ``` - **Resolution errors** occur when the runtime value of a part of a message @@ -864,11 +836,9 @@ These are divided into the following categories: > ``` > > ``` - > {{ - > match {$var :func} - > when 1 {{The value is one.}} - > when * {{The value is not one.}} - > }} + > .match {$var :func} + > 1 {{The value is one.}} + > * {{The value is not one.}} > ``` - **Unknown Function errors** occur when an _expression_ includes @@ -883,11 +853,9 @@ These are divided into the following categories: > ``` > > ``` - > {{ - > match {|horse| :func} - > when 1 {{The value is one.}} - > when * {{The value is not one.}} - > }} + > .match {|horse| :func} + > 1 {{The value is one.}} + > * {{The value is not one.}} > ``` - **Unsupported Expression errors** occur when an expression uses @@ -905,11 +873,19 @@ These are divided into the following categories: > if done within a context that does not support the `^` private use sigil: > > ``` - > {{ - > match {|horse| ^private} - > when 1 {{The value is one.}} - > when * {{The value is not one.}} - > }} + > .match {|horse| ^private} + > 1 {{The value is one.}} + > * {{The value is not one.}} + > ``` + + - **Unsupported Statement errors** occur when a message includes a _reserved statement_. + + > For example, attempting to format this message + > would always result in an Unsupported Statement error: + > + > ``` + > .some {|horse|} + > {{The message body}} > ``` - **Selection errors** occur when message selection fails. @@ -921,20 +897,16 @@ These are divided into the following categories: > uses a `:plural` selector function which requires its input to be numeric: > > ``` - > {{ - > match {|horse| :plural} - > when 1 {{The value is one.}} - > when * {{The value is not one.}} - > }} + > .match {|horse| :plural} + > 1 {{The value is one.}} + > * {{The value is not one.}} > ``` > > ``` - > {{ - > local $sel = {|horse| :plural} - > match {$sel} - > when 1 {{The value is one.}} - > when * {{The value is not one.}} - > }} + > .local $sel = {|horse| :plural} + > .match {$sel} + > 1 {{The value is one.}} + > * {{The value is not one.}} > ``` - **Formatting errors** occur during the formatting of a resolved value, @@ -960,10 +932,8 @@ These are divided into the following categories: > ``` > > ``` - > {{ - > local $id = {$user :get field=id} + > .local $id = {$user :get field=id} > {{Hello, {$id :get field=name}!}} - > }} > ``` > > ``` diff --git a/spec/message.abnf b/spec/message.abnf index 5ca464c79e..bd7dbeb8eb 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -1,19 +1,21 @@ message = simple-message / complex-message simple-message = [simple-start pattern] -simple-start = simple-start-char / text-escape / expression -pattern = *(text-char / text-escape / expression) +simple-start = simple-start-char / text-escape / placeholder +pattern = *(text-char / text-escape / placeholder) +placeholder = expression complex-message = *(declaration [s]) complex-body declaration = input-declaration / local-declaration / reserved-statement input-declaration = input [s] variable-expression local-declaration = local s variable [s] "=" [s] expression -complex-body = quoted-pattern - / ((selectors / reserved-statement) 1*([s] variant)) +complex-body = quoted-pattern / matcher quoted-pattern = "{{" pattern "}}" -selectors = match 1*([s] expression) +matcher = match-statement 1*([s] variant) +match-statement = match 1*([s] selector) +selector = expression variant = key *(s key) [s] quoted-pattern key = literal / "*" diff --git a/spec/registry.md b/spec/registry.md index fb41a532dd..b0052f0a89 100644 --- a/spec/registry.md +++ b/spec/registry.md @@ -123,11 +123,9 @@ For the sake of brevity, only `locales="en"` is considered. Given the above description, the `:number` function is defined to work both in a selector and a placeholder: ``` -{{ -match {$count :number} -when 1 {{One new message}} -when * {{{$count :number} new messages}} -}} +.match {$count :number} +1 {{One new message}} +* {{{$count :number} new messages}} ``` Furthermore, @@ -189,8 +187,6 @@ The following message references the second signature of `:adjective`, which only expects the `accord` option: >``` -> {{ -> input {$object :noun case=nominative} -> {{You see {$color :adjective article=indefinite accord=$object} {$object}!}} -> }} +> .input {$object :noun case=nominative} +> {{You see {$color :adjective article=indefinite accord=$object} {$object}!}} >``` diff --git a/spec/syntax.md b/spec/syntax.md index 9d2a3f9b94..cf34de5570 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -111,30 +111,44 @@ A **_message_** is the complete template for a specific message forma > > **Example** This _message_: > > > > ``` -> > {{ -> > local $foo = { |horse| } -> > {{You have a {$foo}!}} -> > }} +> > .local $foo = { |horse| } +> > {{You have a {$foo}!}} > > ``` > > > > Can also be written as: > > > > ``` -> > {{local $foo={|horse|}{{You have a {$foo}!}}}} +> > .local $foo={|horse|}{{You have a {$foo}!}} > > ``` > > > > An exception to this is: whitespace inside a _pattern_ is **always** significant. -A _message_ can be a _pattern_ or it can be a _complex message_. +A _message_ can be a _simple message_ or it can be a _complex message_. + +```abnf +message = simple-message / complex-message +``` + +A **_simple message_** contains a single _pattern_, +with restrictions on its first character. +An empty string is a valid _simple message_. + +```abnf +simple-message = [simple-start pattern] +simple-start = simple-start-char / text-escape / placeholder +``` A **_complex message_** is any _message_ that contains _declarations_, a _matcher_, or both. -A _complex message_ always begins with the the sequence `{{` -and is terminated by the sequence `}}` +A _complex message_ always begins with either a keyword that has a `.` prefix or a _quoted pattern_ and consists of: 1. an optional list of _declarations_, followed by -2. a _body_ +2. a _complex body_ + +```abnf +complex-message = *(declaration [s]) complex-body +``` ### Declarations @@ -148,8 +162,11 @@ MAY include an _annotation_ that is applied to the external value. A **_local-declaration_** binds a _variable_ to the resolved value of an _expression_. +For compatibility with later MessageFormat 2 specification versions, +_declarations_ MAY also include _reserved statements_. + ```abnf -declaration = input-declaration / local-declaration +declaration = input-declaration / local-declaration / reserved-statement input-declaration = input [s] variable-expression local-declaration = local s variable [s] "=" [s] expression ``` @@ -172,23 +189,49 @@ external input value does not appear in a _declaration_. > than one applied to the same _variable_ named in a _declaration_. > For example, this message is _valid_: > ``` -> {{ -> input {$var :number maxFractionDigits=0} -> match {$var :plural maxFractionDigits=2} -> when 0 {{The selector can apply a different annotation to {$var} for the purposes of selection}} -> when * {{A placeholder in a pattern can apply a different annotation to {$var :number maxFractionDigits=3}}} -> }} +> .input {$var :number maxFractionDigits=0} +> .match {$var :plural maxFractionDigits=2} +> 0 {{The selector can apply a different annotation to {$var} for the purposes of selection}} +> * {{A placeholder in a pattern can apply a different annotation to {$var :number maxFractionDigits=3}}} > ``` > (See [Error Handling](./formatting.md#error-handling) for examples of invalid messages) -### Body +#### Reserved Statements + +A **_reserved statement_** reserves additional `.keywords` +for use by future versions of this specification. +Any such future keyword must start with `.`, +followed by two or more lower-case ASCII characters. + +The rest of the statement supports +a similarly wide range of content as _reserved annotations_, +but it MUST end with one or more _expressions_. + +```abnf +reserved-statement = reserved-keyword [s reserved-body] 1*expression +reserved-keyword = "." 2*(%x61-7A) +``` + +> [!Note] +> The `reserved-keyword` ABNF rule is a simplification, +> as it MUST NOT be considered to match any of the existing keywords +> `.input`, `.local`, or `.match`. + +This allows flexibility in future standardization, +as future definitions MAY define additional semantics and constraints +on the contents of these _reserved statements_. + +Implementations MUST NOT assign meaning or semantics to a _reserved statement_: +these are reserved for future standardization. +Implementations MUST NOT remove or alter the contents of a _reserved statement_. -The **_body_** of a _complex message_ is the part that will be formatted. -The _body_ consists of either a _quoted pattern_ or a _matcher_. +### Complex Body + +The **_complex body_** of a _complex message_ is the part that will be formatted. +The _complex body_ consists of either a _quoted pattern_ or a _matcher_. ```abnf -body = quoted-pattern - / (selectors 1*([s] variant)) +complex-body = quoted-pattern / matcher ``` ## Pattern @@ -198,7 +241,7 @@ Unless there is an error, resolving a _message_ always results in the formatting of a single _pattern_. ```abnf -pattern = *(text / expression) +pattern = *(text-char / text-escape / placeholder) ``` A _pattern_ MAY be empty. @@ -229,12 +272,28 @@ A _quoted pattern_ MAY be empty. **_text_** is the translateable content of a _pattern_. Any Unicode code point is allowed, except for surrogate code points U+D800 through U+DFFF inclusive. -The characters `\`, `{`, and `}` MUST be escaped as `\\`, `\{`, and `\}` -respectively. +The characters U+005C REVERSE SOLIDUS `\`, +U+007B LEFT CURLY BRACKET `{`, and U+007D RIGHT CURLY BRACKET `}` +MUST be escaped as `\\`, `\{`, and `\}` respectively. + +In the ABNF, _text_ is represented by non-empty sequences of +`simple-start-char`, `text-char`, and `text-escape`. +The first of these is used at the start of a _simple message_, +and matches `text-char` except for not allowing U+002E FULL STOP `.`. Whitespace in _text_, including tabs, spaces, and newlines is significant and MUST be preserved during formatting. +```abnf +simple-start-char = %x0-2D ; omit . + / %x2F-5B ; omit \ + / %x5D-7A ; omit { + / %x7C ; omit } + / %x7E-D7FF ; omit surrogates + / %xE000-10FFFF +text-char = simple-start-char / "." +``` + When a _pattern_ is quoted by embedding the _pattern_ in curly brackets, the resulting _message_ can be embedded into various formats regardless of the container's whitespace trimming rules. @@ -246,19 +305,10 @@ Otherwise, care must be taken to ensure that pattern-significant whitespace is p > This _pattern_ consists of _text_ with exactly three spaces before and after the word "Hello": > > ```properties -> hello = {{{{ Hello }}}} +> hello = {{ Hello }} > hello2=\ Hello \ > ``` -```abnf -text = 1*(text-char / text-escape) -text-char = %x0-5B ; omit \ - / %x5D-7A ; omit { - / %x7C ; omit } - / %x7E-D7FF ; omit surrogates - / %xE000-10FFFF -``` - ### Placeholder A **_placeholder_** is an _expression_ that appears inside of a _pattern_ @@ -270,7 +320,7 @@ placeholder = expression ## Matcher -A **_matcher_** is the _body_ of a _message_ that allows runtime selection +A **_matcher_** is the _complex body_ of a _message_ that allows runtime selection of the _pattern_ to use for formatting. This allows the form or content of a _message_ to vary based on values determined at runtime. @@ -288,29 +338,28 @@ satisfied: - At least one _variant_ MUST exist whose _keys_ are all equal to the "catch-all" key `*`. ```abnf -matcher = match 1*(selector) 1*(variant) +matcher = match-statement 1*([s] variant) +match-statement = match 1*([s] selector) ``` > A _message_ with a _matcher_: > > ``` -> {{ -> match {$count :number} -> when 1 {{You have one notification.}} -> when * {{You have {$count} notifications.}} -> }} +> .match {$count :number} +> 1 {{You have one notification.}} +> * {{You have {$count} notifications.}} > ``` > A _message_ containing a _matcher_ formatted on a single line: > > ``` -> {{match {:platform} when windows {{Settings}} when * {{Preferences}}}} +> .match {:platform} windows {{Settings}} * {{Preferences}} > ``` ### Selector A **_selector_** is an _expression_ that ranks or excludes the -_variants_ based on the value of its corresponding _key_ in each _variant_. +_variants_ based on the value of the corresponding _key_ in each _variant_. The combination of _selectors_ in a _matcher_ thus determines which _pattern_ will be used during formatting. @@ -325,41 +374,36 @@ There MAY be any number of additional _selectors_. > allowing the _message_ to choose a _pattern_ based on grammatical case: > > ``` -> {{ -> match {$userName :hasCase} -> when vocative {{Hello, {$userName :person case=vocative}!}} -> when accusative {{Please welcome {$userName :person case=accusative}!}} -> when * {{Hello!}} -> }} +> .match {$userName :hasCase} +> vocative {{Hello, {$userName :person case=vocative}!}} +> accusative {{Please welcome {$userName :person case=accusative}!}} +> * {{Hello!}} > ``` > A message with two _selectors_: > > ``` -> {{ -> match {$photoCount :number} {$userGender :equals} -> when 1 masculine {{{$userName} added a new photo to his album.}} -> when 1 feminine {{{$userName} added a new photo to her album.}} -> when 1 * {{{$userName} added a new photo to their album.}} -> when * masculine {{{$userName} added {$photoCount} photos to his album.}} -> when * feminine {{{$userName} added {$photoCount} photos to her album.}} -> when * * {{{$userName} added {$photoCount} photos to their album.}} -> }} +> .match {$photoCount :number} {$userGender :equals} +> 1 masculine {{{$userName} added a new photo to his album.}} +> 1 feminine {{{$userName} added a new photo to her album.}} +> 1 * {{{$userName} added a new photo to their album.}} +> * masculine {{{$userName} added {$photoCount} photos to his album.}} +> * feminine {{{$userName} added {$photoCount} photos to her album.}} +> * * {{{$userName} added {$photoCount} photos to their album.}} > ``` ### Variant -A **_variant_** is a _pattern_ associated with a set of _keys_ in a _matcher_. -Each _variant_ MUST begin with the keyword `when`, -be followed by a sequence of _keys_, -and terminate with a valid _pattern_. +A **_variant_** is a _quoted pattern_ associated with a set of _keys_ in a _matcher_. +Each _variant_ MUST begin with a sequence of _keys_, +and terminate with a valid _quoted pattern_. The number of _keys_ in each _variant_ MUST match the number of _selectors_ in the _matcher_. -Each _key_ is separated from the keyword `when` and from each other by whitespace. -Whitespace is permitted but not required between the last _key_ and the _pattern_. +Each _key_ is separated from each other by whitespace. +Whitespace is permitted but not required between the last _key_ and the _quoted pattern_. ```abnf -variant = when 1*(s key) [s] pattern +variant = key *(s key) [s] quoted-pattern key = literal / "*" ``` @@ -394,7 +438,6 @@ expression = literal-expression / variable-expression / function-expression literal-expression = "{" [s] literal [s annotation] [s] "}" variable-expression = "{" [s] variable [s annotation] [s] "}" function-expression = "{" [s] annotation [s] "}" -annotation = (function *(s option)) / private-use / reserved ``` There are several types of _expression_ that can appear in a _message_. @@ -411,14 +454,14 @@ Additionally, an _input-declaration_ can contain a _variable-expression_. > Declarations: > > ``` -> input {$x :function option=value} -> local $y = {|This is an expression|} +> .input {$x :function option=value} +> .local $y = {|This is an expression|} > ``` > > Selectors: > > ``` -> match {$selector :functionRequired} +> .match {$selector :functionRequired} > ``` > > Placeholders: @@ -433,10 +476,12 @@ Additionally, an _input-declaration_ can contain a _variable-expression_. An **_annotation_** is part of an _expression_ containing either a _function_ together with its associated _options_, or -a _private-use_ or _reserved_ sequence. +a _reserved annotation_ or a _private-use annotation_. ```abnf -annotation = (function *(s option)) / reserved / private-use +annotation = (function *(s option)) + / reserved-annotation + / private-use-annotation ``` An **_operand_** is the _literal_ of a _literal-expression_ or @@ -533,37 +578,38 @@ option = identifier [s] "=" [s] (literal / variable) > Hello, {$userObj :person firstName=long}! > ``` -#### Private-Use +#### Private-Use Annotations -A **_private-use_** _annotation_ is an _annotation_ whose syntax is reserved +A **_private-use annotation_** is an _annotation_ whose syntax is reserved for use by a specific implementation or by private agreement between multiple implementations. -Implementations MAY define their own meaning and semantics for _private-use_ annotations. +Implementations MAY define their own meaning and semantics for _private-use annotations_. -A _private-use_ annotation starts with either U+0026 AMPERSAND `&` or U+005E CIRCUMFLEX ACCENT `^`. +A _private-use annotation_ starts with either U+0026 AMPERSAND `&` or U+005E CIRCUMFLEX ACCENT `^`. Characters, including whitespace, are assigned meaning by the implementation. The definition of escapes in the `reserved-body` production, used for the body of -a _private-use_ annotation is an affordance to implementations that +a _private-use annotation_ is an affordance to implementations that wish to use a syntax exactly like other functions. Specifically: - The characters `\`, `{`, and `}` MUST be escaped as `\\`, `\{`, and `\}` respectively - when they appear in the body of a _private-use_ annotation. -- The character `|` is special: it SHOULD be escaped as `\|` in a _private-use_ annotation, - but can appear unescaped as long as it is paired with another `|`. This is an affordance to - allow _literals_ to appear in the private use syntax. + when they appear in the body of a _private-use annotation_. +- The character `|` is special: it SHOULD be escaped as `\|` in a _private-use annotation_, + but can appear unescaped as long as it is paired with another `|`. + This is an affordance to allow _literals_ to appear in the private use syntax. -A _private-use_ _annotation_ MAY be empty after its introducing sigil. - -**NOTE:** Users are cautioned that _private-use_ sequences cannot be reliably exchanged -and can result in errors during formatting. -It is generally a better idea to use the function registry -to define additional formatting or annotation options. +A _private-use annotation_ MAY be empty after its introducing sigil. ```abnf -private-use = private-start reserved-body -private-start = "&" / "^" +private-use-annotation = private-start reserved-body +private-start = "^" / "&" ``` +> [!Note] +> Users are cautioned that _private-use annotations_ cannot be reliably exchanged +> and can result in errors during formatting. +> It is generally a better idea to use the function registry +> to define additional formatting or annotation options. + > Here are some examples of what _private-use_ sequences might look like: > > ``` @@ -575,39 +621,40 @@ private-start = "&" / "^" > Protect stuff in {^ph}{^/ph}private use{^ph}{^/ph} > ``` -#### Reserved +#### Reserved Annotations -A **_reserved_** _annotation_ is an _annotation_ whose syntax is reserved +A **_reserved annotation_** is an _annotation_ whose syntax is reserved for future standardization. -A _reserved_ _annotation_ starts with a reserved character. -A _reserved_ _annotation_ MAY be empty or contain arbitrary text after its first character. +A _reserved annotation_ starts with a reserved character. +A _reserved annotation_ MAY be empty or contain arbitrary text after its first character. This allows maximum flexibility in future standardization, as future definitions MAY define additional semantics and constraints on the contents of these _annotations_. -A _reserved_ _annotation_ does not include trailing whitespace. +A _reserved annotation_ does not include trailing whitespace. Implementations MUST NOT assign meaning or semantics to an _annotation_ starting with `reserved-start`: these are reserved for future standardization. -Implementations MUST NOT remove or alter the contents of a _reserved_ _annotation_. +Implementations MUST NOT remove or alter the contents of a _reserved annotation_. While a reserved sequence is technically "well-formed", -unrecognized reserved sequences have no meaning and MAY result in errors during formatting. +unrecognized _reserved-annotations_ or _private-use-annotations_ have no meaning. ```abnf -reserved = reserved-start reserved-body -reserved-start = "!" / "@" / "#" / "%" / "*" / "<" / ">" / "/" / "?" / "~" - -reserved-body = *( [s] 1*(reserved-char / reserved-escape / quoted)) -reserved-char = %x00-08 ; omit HTAB and LF - / %x0B-0C ; omit CR - / %x0E-19 ; omit SP - / %x21-5B ; omit \ - / %x5D-7A ; omit { | } - / %x7E-D7FF ; omit surrogates - / %xE000-10FFFF +reserved-annotation = reserved-annotation-start reserved-body +reserved-annotation-start = "!" / "@" / "#" / "%" / "*" + / "<" / ">" / "/" / "?" / "~" + +reserved-body = *([s] 1*(reserved-char / reserved-escape / quoted)) +reserved-char = %x00-08 ; omit HTAB and LF + / %x0B-0C ; omit CR + / %x0E-19 ; omit SP + / %x21-5B ; omit \ + / %x5D-7A ; omit { | } + / %x7E-D7FF ; omit surrogates + / %xE000-10FFFF ``` ## Other Syntax Elements @@ -618,14 +665,13 @@ This section defines common elements used to construct _messages_. A **_keyword_** is a reserved token that has a unique meaning in the _message_ syntax. -The following four keywords are reserved: `input`, `local`, `match`, and `when`. -Reserved keywords are always lowercase. +The following three keywords are defined: `.input`, `.local`, and `.match`. +Keywords are always lowercase and start with U+002E FULL STOP `.`. ```abnf -input = "input" -local = "local" -match = "match" -when = "when" +input = %s".input" +local = %s".local" +match = %s".match" ``` ### Literals @@ -781,9 +827,7 @@ To make `message.abnf` compatible with that version of ABNF, replace the rules of the same name with this block: ```abnf -; reserved keywords are always lowercase -input = %x69.6E.70.75.74 ; "input" -local = %x6C.6F.63.61.6C ; "local" -match = %x6D.61.74.63.68 ; "match" -when = %x77.68.65.6E ; "when" +input = %x2E.69.6E.70.75.74 ; ".input" +local = %x2E.6C.6F.63.61.6C ; ".local" +match = %x2E.6D.61.74.63.68 ; ".match" ```