Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 677d666

Browse files
petebacondarwinmatsko
authored andcommitted
fix(localize): improve matching and parsing of XLIFF 1.2 translation files (#35793)
Previously, the `Xliff1TranslationParser` only matched files that had a narrow choice of extensions (e.g. `xlf`) and also relied upon a regular expression match of an optional XML namespace directive. This commit relaxes the requirement on both of these and, instead, relies upon parsing the file into XML and identifying an element of the form `<xliff version="1.2">` which is the minimal requirement for such files. PR Close #35793
1 parent 4d7b176 commit 677d666

File tree

4 files changed

+901
-119
lines changed

4 files changed

+901
-119
lines changed

‎packages/localize/src/tools/src/translate/translation_files/translation_loader.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ export class TranslationLoader {
4242
}
4343

4444
const {locale: parsedLocale, translations, diagnostics} =
45-
translationParser.parse(filePath, fileContents);
45+
translationParser.parse(filePath, fileContents, result);
4646
if (diagnostics.hasErrors) {
4747
throw new Error(diagnostics.formatDiagnostics(
4848
`The translation file "${filePath}" could not be parsed.`));

‎packages/localize/src/tools/src/translate/translation_files/translation_parsers/translation_utils.ts

Lines changed: 105 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
* Use of this source code is governed by an MIT-style license that can be
66
* found in the LICENSE file at https://angular.io/license
77
*/
8-
import {Element, LexerRange, Node, XmlParser} from '@angular/compiler';
8+
import {Element, LexerRange, Node, ParseError, ParseErrorLevel, ParseSourceSpan, XmlParser} from '@angular/compiler';
9+
import {Diagnostics} from '../../../diagnostics';
910
import {TranslationParseError} from './translation_parse_error';
1011

1112
export function getAttrOrThrow(element: Element, attrName: string): string {
@@ -22,6 +23,14 @@ export function getAttribute(element: Element, attrName: string): string|undefin
2223
return attr !== undefined ? attr.value : undefined;
2324
}
2425

26+
/**
27+
* Parse the "contents" of an XML element.
28+
*
29+
* This would be equivalent to parsing the `innerHTML` string of an HTML document.
30+
*
31+
* @param element The element whose inner range we want to parse.
32+
* @returns a collection of XML `Node` objects that were parsed from the element's contents.
33+
*/
2534
export function parseInnerRange(element: Element): Node[] {
2635
const xmlParser = new XmlParser();
2736
const xml = xmlParser.parse(
@@ -33,6 +42,10 @@ export function parseInnerRange(element: Element): Node[] {
3342
return xml.rootNodes;
3443
}
3544

45+
/**
46+
* Compute a `LexerRange` that contains all the children of the given `element`.
47+
* @param element The element whose inner range we want to compute.
48+
*/
3649
function getInnerRange(element: Element): LexerRange {
3750
const start = element.startSourceSpan !.end;
3851
const end = element.endSourceSpan !.start;
@@ -42,4 +55,94 @@ function getInnerRange(element: Element): LexerRange {
4255
startCol: start.col,
4356
endPos: end.offset,
4457
};
45-
}
58+
}
59+
60+
/**
61+
* This "hint" object is used to pass information from `canParse()` to `parse()` for
62+
* `TranslationParser`s that expect XML contents.
63+
*
64+
* This saves the `parse()` method from having to re-parse the XML.
65+
*/
66+
export interface XmlTranslationParserHint {
67+
element: Element;
68+
errors: ParseError[];
69+
}
70+
71+
/**
72+
* Can this XML be parsed for translations, given the expected `rootNodeName` and expected root node
73+
* `attributes` that should appear in the file.
74+
*
75+
* @param filePath The path to the file being checked.
76+
* @param contents The contents of the file being checked.
77+
* @param rootNodeName The expected name of an XML root node that should exist.
78+
* @param attributes The attributes (and their values) that should appear on the root node.
79+
* @returns The `XmlTranslationParserHint` object for use by `TranslationParser.parse()` if the XML
80+
* document has the expected format.
81+
*/
82+
export function canParseXml(
83+
filePath: string, contents: string, rootNodeName: string,
84+
attributes: Record<string, string>): XmlTranslationParserHint|false {
85+
const xmlParser = new XmlParser();
86+
const xml = xmlParser.parse(contents, filePath);
87+
88+
if (xml.rootNodes.length === 0 ||
89+
xml.errors.some(error => error.level === ParseErrorLevel.ERROR)) {
90+
return false;
91+
}
92+
93+
const rootElements = xml.rootNodes.filter(isNamedElement(rootNodeName));
94+
const rootElement = rootElements[0];
95+
if (rootElement === undefined) {
96+
return false;
97+
}
98+
99+
for (const attrKey of Object.keys(attributes)) {
100+
const attr = rootElement.attrs.find(attr => attr.name === attrKey);
101+
if (attr === undefined || attr.value !== attributes[attrKey]) {
102+
return false;
103+
}
104+
}
105+
106+
if (rootElements.length > 1) {
107+
xml.errors.push(new ParseError(
108+
xml.rootNodes[1].sourceSpan,
109+
'Unexpected root node. XLIFF 1.2 files should only have a single <xliff> root node.',
110+
ParseErrorLevel.WARNING));
111+
}
112+
113+
return {element: rootElement, errors: xml.errors};
114+
}
115+
116+
/**
117+
* Create a predicate, which can be used by things like `Array.filter()`, that will match a named
118+
* XML Element from a collection of XML Nodes.
119+
*
120+
* @param name The expected name of the element to match.
121+
*/
122+
export function isNamedElement(name: string): (node: Node) => node is Element {
123+
function predicate(node: Node): node is Element {
124+
return node instanceof Element && node.name === name;
125+
}
126+
return predicate;
127+
}
128+
129+
/**
130+
* Add an XML parser related message to the given `diagnostics` object.
131+
*/
132+
export function addParseDiagnostic(
133+
diagnostics: Diagnostics, sourceSpan: ParseSourceSpan, message: string,
134+
level: ParseErrorLevel): void {
135+
addParseError(diagnostics, new ParseError(sourceSpan, message, level));
136+
}
137+
138+
/**
139+
* Copy the formatted error message from the given `parseError` object into the given `diagnostics`
140+
* object.
141+
*/
142+
export function addParseError(diagnostics: Diagnostics, parseError: ParseError): void {
143+
if (parseError.level === ParseErrorLevel.ERROR) {
144+
diagnostics.error(parseError.toString());
145+
} else {
146+
diagnostics.warn(parseError.toString());
147+
}
148+
}

‎packages/localize/src/tools/src/translate/translation_files/translation_parsers/xliff1_translation_parser.ts

Lines changed: 98 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,16 @@
55
* Use of this source code is governed by an MIT-style license that can be
66
* found in the LICENSE file at https://angular.io/license
77
*/
8-
import {Element, Node, XmlParser, visitAll} from '@angular/compiler';
9-
import {ɵMessageId, ɵParsedTranslation} from '@angular/localize';
10-
import {extname} from 'path';
8+
import {Element, ParseErrorLevel, visitAll} from '@angular/compiler';
9+
import {ɵParsedTranslation} from '@angular/localize';
1110

1211
import {Diagnostics} from '../../../diagnostics';
1312
import {BaseVisitor} from '../base_visitor';
1413
import {MessageSerializer} from '../message_serialization/message_serializer';
1514
import {TargetMessageRenderer} from '../message_serialization/target_message_renderer';
1615

17-
import {TranslationParseError} from './translation_parse_error';
1816
import {ParsedTranslationBundle, TranslationParser} from './translation_parser';
19-
import {getAttrOrThrow, getAttribute, parseInnerRange} from './translation_utils';
20-
21-
const XLIFF_1_2_NS_REGEX = /xmlns="urn:oasis:names:tc:xliff:document:1.2"/;
17+
import {XmlTranslationParserHint, addParseDiagnostic, addParseError, canParseXml, getAttribute, isNamedElement, parseInnerRange} from './translation_utils';
2218

2319
/**
2420
* A translation parser that can load XLIFF 1.2 files.
@@ -27,68 +23,120 @@ const XLIFF_1_2_NS_REGEX = /xmlns="urn:oasis:names:tc:xliff:document:1.2"/;
2723
* http://docs.oasis-open.org/xliff/v1.2/xliff-profile-html/xliff-profile-html-1.2.html
2824
*
2925
*/
30-
export class Xliff1TranslationParser implements TranslationParser {
31-
canParse(filePath: string, contents: string): boolean {
32-
return (extname(filePath) === '.xlf') && XLIFF_1_2_NS_REGEX.test(contents);
26+
export class Xliff1TranslationParser implements TranslationParser<XmlTranslationParserHint> {
27+
canParse(filePath: string, contents: string): XmlTranslationParserHint|false {
28+
return canParseXml(filePath, contents, 'xliff', {version: '1.2'});
29+
}
30+
31+
parse(filePath: string, contents: string, hint?: XmlTranslationParserHint):
32+
ParsedTranslationBundle {
33+
if (hint) {
34+
return this.extractBundle(hint);
35+
} else {
36+
return this.extractBundleDeprecated(filePath, contents);
37+
}
38+
}
39+
40+
private extractBundle({element, errors}: XmlTranslationParserHint): ParsedTranslationBundle {
41+
const diagnostics = new Diagnostics();
42+
errors.forEach(e => addParseError(diagnostics, e));
43+
44+
if (element.children.length === 0) {
45+
addParseDiagnostic(
46+
diagnostics, element.sourceSpan, 'Missing expected <file> element',
47+
ParseErrorLevel.WARNING);
48+
return {locale: undefined, translations: {}, diagnostics};
49+
}
50+
51+
const files = element.children.filter(isNamedElement('file'));
52+
if (files.length === 0) {
53+
addParseDiagnostic(
54+
diagnostics, element.sourceSpan, 'No <file> elements found in <xliff>',
55+
ParseErrorLevel.WARNING);
56+
} else if (files.length > 1) {
57+
addParseDiagnostic(
58+
diagnostics, files[1].sourceSpan, 'More than one <file> element found in <xliff>',
59+
ParseErrorLevel.WARNING);
60+
}
61+
62+
const bundle = {
63+
locale: getAttribute(files[0], 'target-language'),
64+
translations: {}, diagnostics,
65+
};
66+
const translationVisitor = new XliffTranslationVisitor();
67+
visitAll(translationVisitor, files[0].children, bundle);
68+
69+
return bundle;
3370
}
3471

35-
parse(filePath: string, contents: string): ParsedTranslationBundle {
36-
const xmlParser = new XmlParser();
37-
const xml = xmlParser.parse(contents, filePath);
38-
const bundle = XliffFileElementVisitor.extractBundle(xml.rootNodes);
39-
if (bundle === undefined) {
72+
private extractBundleDeprecated(filePath: string, contents: string) {
73+
const hint = this.canParse(filePath, contents);
74+
if (!hint) {
4075
throw new Error(`Unable to parse "${filePath}" as XLIFF 1.2 format.`);
4176
}
77+
const bundle = this.extractBundle(hint);
78+
if (bundle.diagnostics.hasErrors) {
79+
const message =
80+
bundle.diagnostics.formatDiagnostics(`Failed to parse "${filePath}" as XLIFF 1.2 format`);
81+
throw new Error(message);
82+
}
4283
return bundle;
4384
}
4485
}
4586

4687
class XliffFileElementVisitor extends BaseVisitor {
47-
private bundle: ParsedTranslationBundle|undefined;
48-
49-
static extractBundle(xliff: Node[]): ParsedTranslationBundle|undefined {
50-
const visitor = new this();
51-
visitAll(visitor, xliff);
52-
return visitor.bundle;
88+
visitElement(fileElement: Element): any {
89+
if (fileElement.name === 'file') {
90+
return {fileElement, locale: getAttribute(fileElement, 'target-language')};
91+
}
5392
}
93+
}
5494

55-
visitElement(element: Element): any {
56-
if (element.name === 'file') {
57-
this.bundle = {
58-
locale: getAttribute(element, 'target-language'),
59-
translations: XliffTranslationVisitor.extractTranslations(element),
60-
diagnostics: new Diagnostics(),
61-
};
95+
class XliffTranslationVisitor extends BaseVisitor {
96+
visitElement(element: Element, bundle: ParsedTranslationBundle): void {
97+
if (element.name === 'trans-unit') {
98+
this.visitTransUnitElement(element, bundle);
6299
} else {
63-
return visitAll(this, element.children);
100+
visitAll(this, element.children, bundle);
64101
}
65102
}
66-
}
67103

68-
class XliffTranslationVisitor extends BaseVisitor {
69-
private translations: Record<ɵMessageId, ɵParsedTranslation> = {};
104+
private visitTransUnitElement(element: Element, bundle: ParsedTranslationBundle): void {
105+
// Error if no `id` attribute
106+
const id = getAttribute(element, 'id');
107+
if (id === undefined) {
108+
addParseDiagnostic(
109+
bundle.diagnostics, element.sourceSpan,
110+
`Missing required "id" attribute on <trans-unit> element.`, ParseErrorLevel.ERROR);
111+
return;
112+
}
70113

71-
static extractTranslations(file: Element): Record<string, ɵParsedTranslation> {
72-
const visitor = new this();
73-
visitAll(visitor, file.children);
74-
return visitor.translations;
75-
}
114+
// Error if there is already a translation with the same id
115+
if (bundle.translations[id] !== undefined) {
116+
addParseDiagnostic(
117+
bundle.diagnostics, element.sourceSpan, `Duplicated translations for message "${id}"`,
118+
ParseErrorLevel.ERROR);
119+
return;
120+
}
76121

77-
visitElement(element: Element): any {
78-
if (element.name === 'trans-unit') {
79-
const id = getAttrOrThrow(element, 'id');
80-
if (this.translations[id] !== undefined) {
81-
throw new TranslationParseError(
82-
element.sourceSpan, `Duplicated translations for message "${id}"`);
83-
}
122+
// Error if there is no `<target>` child element
123+
const targetMessage = element.children.find(isNamedElement('target'));
124+
if (targetMessage === undefined) {
125+
addParseDiagnostic(
126+
bundle.diagnostics, element.sourceSpan, 'Missing required <target> element',
127+
ParseErrorLevel.ERROR);
128+
return;
129+
}
84130

85-
const targetMessage = element.children.find(isTargetElement);
86-
if (targetMessage === undefined) {
87-
throw new TranslationParseError(element.sourceSpan, 'Missing required <target> element');
131+
try {
132+
bundle.translations[id] = serializeTargetMessage(targetMessage);
133+
} catch (e) {
134+
// Capture any errors from serialize the target message
135+
if (e.span && e.msg && e.level) {
136+
addParseDiagnostic(bundle.diagnostics, e.span, e.msg, e.level);
137+
} else {
138+
throw e;
88139
}
89-
this.translations[id] = serializeTargetMessage(targetMessage);
90-
} else {
91-
return visitAll(this, element.children);
92140
}
93141
}
94142
}
@@ -100,7 +148,3 @@ function serializeTargetMessage(source: Element): ɵParsedTranslation {
100148
});
101149
return serializer.serialize(parseInnerRange(source));
102150
}
103-
104-
function isTargetElement(node: Node): node is Element {
105-
return node instanceof Element && node.name === 'target';
106-
}

0 commit comments

Comments
 (0)