From 4482a914f52311356f6f4b7a695d4075ca22c0c6 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Tue, 17 Nov 2020 16:53:38 +0800 Subject: [PATCH 01/40] internal/language: fix resizeRange index wrong way Fixes golang/go#42536 Change-Id: I572cdbb26d320c4d9a972d555ddc6427ce1f0348 Reviewed-on: https://go-review.googlesource.com/c/text/+/270697 Run-TryBot: Meng Zhuo TryBot-Result: Go Bot Reviewed-by: Marcel van Lohuizen Trust: Meng Zhuo --- internal/language/parse.go | 11 ++++++----- language/parse_test.go | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/internal/language/parse.go b/internal/language/parse.go index 2be83e1da..a2fdad89d 100644 --- a/internal/language/parse.go +++ b/internal/language/parse.go @@ -133,14 +133,15 @@ func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) { s.start = oldStart if end := oldStart + newSize; end != oldEnd { diff := end - oldEnd - if end < cap(s.b) { - b := make([]byte, len(s.b)+diff) + var b []byte + if n := len(s.b) + diff; n > cap(s.b) { + b = make([]byte, n) copy(b, s.b[:oldStart]) - copy(b[end:], s.b[oldEnd:]) - s.b = b } else { - s.b = append(s.b[end:], s.b[oldEnd:]...) + b = s.b[:n:n] } + copy(b[end:], s.b[oldEnd:]) + s.b = b s.next = end + (s.next - s.end) s.end = end } diff --git a/language/parse_test.go b/language/parse_test.go index 2ff28bfd3..041660c4e 100644 --- a/language/parse_test.go +++ b/language/parse_test.go @@ -318,6 +318,7 @@ func TestParseAcceptLanguage(t *testing.T) { {nil, "aa;q", false}, {nil, "aa;q=", false}, {nil, "aa;q=.", false}, + {nil, "00-t-0o", false}, // odd fallbacks { From 967b8f6126b019daebc17c221889cb59560fa8d1 Mon Sep 17 00:00:00 2001 From: Patrick Gundlach Date: Thu, 5 Nov 2020 13:15:10 +0100 Subject: [PATCH 02/40] text/unicod/bidi: implement API, remove panics The bidi API splits strings with mixed left-to-right (ltr) and right-to-left (rtl) parts into substrings (segments). Each segment contains a substring, a direction of text flow (either ltr or rtl) and the start and end positions in the input. The paragraph validators do not panic, instead the newParagraph function returns an error message in case the input is invalid. Fixes golang/go#42356 Change-Id: I90cafc8fadb0cf6936dfb1ab373586017147d709 Reviewed-on: https://go-review.googlesource.com/c/text/+/267857 Trust: Ian Lance Taylor Reviewed-by: Marcel van Lohuizen --- unicode/bidi/bidi.go | 221 ++++++++++++++++++++---- unicode/bidi/bidi_test.go | 347 ++++++++++++++++++++++++++++++++++++++ unicode/bidi/core.go | 63 ++++--- unicode/bidi/core_test.go | 10 +- 4 files changed, 584 insertions(+), 57 deletions(-) create mode 100644 unicode/bidi/bidi_test.go diff --git a/unicode/bidi/bidi.go b/unicode/bidi/bidi.go index e8edc54cc..fd057601b 100644 --- a/unicode/bidi/bidi.go +++ b/unicode/bidi/bidi.go @@ -12,15 +12,14 @@ // and without notice. package bidi // import "golang.org/x/text/unicode/bidi" -// TODO: -// The following functionality would not be hard to implement, but hinges on -// the definition of a Segmenter interface. For now this is up to the user. -// - Iterate over paragraphs -// - Segmenter to iterate over runs directly from a given text. -// Also: +// TODO // - Transformer for reordering? // - Transformer (validator, really) for Bidi Rule. +import ( + "bytes" +) + // This API tries to avoid dealing with embedding levels for now. Under the hood // these will be computed, but the question is to which extent the user should // know they exist. We should at some point allow the user to specify an @@ -49,7 +48,9 @@ const ( Neutral ) -type options struct{} +type options struct { + defaultDirection Direction +} // An Option is an option for Bidi processing. type Option func(*options) @@ -66,12 +67,62 @@ type Option func(*options) // DefaultDirection sets the default direction for a Paragraph. The direction is // overridden if the text contains directional characters. func DefaultDirection(d Direction) Option { - panic("unimplemented") + return func(opts *options) { + opts.defaultDirection = d + } } // A Paragraph holds a single Paragraph for Bidi processing. type Paragraph struct { - // buffers + p []byte + o Ordering + opts []Option + types []Class + pairTypes []bracketType + pairValues []rune + runes []rune + options options +} + +// Initialize the p.pairTypes, p.pairValues and p.types from the input previously +// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph +// separator (bidi class B). +// +// The function p.Order() needs these values to be set, so this preparation could be postponed. +// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph +// separator, the whole input needs to be processed anyway and should not be done twice. +// +// The function has the same return values as SetBytes() / SetString() +func (p *Paragraph) prepareInput() (n int, err error) { + p.runes = bytes.Runes(p.p) + bytecount := 0 + // clear slices from previous SetString or SetBytes + p.pairTypes = nil + p.pairValues = nil + p.types = nil + + for _, r := range p.runes { + props, i := LookupRune(r) + bytecount += i + cls := props.Class() + if cls == B { + return bytecount, nil + } + p.types = append(p.types, cls) + if props.IsOpeningBracket() { + p.pairTypes = append(p.pairTypes, bpOpen) + p.pairValues = append(p.pairValues, r) + } else if props.IsBracket() { + // this must be a closing bracket, + // since IsOpeningBracket is not true + p.pairTypes = append(p.pairTypes, bpClose) + p.pairValues = append(p.pairValues, r) + } else { + p.pairTypes = append(p.pairTypes, bpNone) + p.pairValues = append(p.pairValues, 0) + } + } + return bytecount, nil } // SetBytes configures p for the given paragraph text. It replaces text @@ -80,70 +131,150 @@ type Paragraph struct { // consumed from b including this separator. Error may be non-nil if options are // given. func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { - panic("unimplemented") + p.p = b + p.opts = opts + return p.prepareInput() } -// SetString configures p for the given paragraph text. It replaces text -// previously set by SetBytes or SetString. If b contains a paragraph separator +// SetString configures s for the given paragraph text. It replaces text +// previously set by SetBytes or SetString. If s contains a paragraph separator // it will only process the first paragraph and report the number of bytes -// consumed from b including this separator. Error may be non-nil if options are +// consumed from s including this separator. Error may be non-nil if options are // given. func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { - panic("unimplemented") + p.p = []byte(s) + p.opts = opts + return p.prepareInput() } // IsLeftToRight reports whether the principle direction of rendering for this // paragraphs is left-to-right. If this returns false, the principle direction // of rendering is right-to-left. func (p *Paragraph) IsLeftToRight() bool { - panic("unimplemented") + return p.Direction() == LeftToRight } // Direction returns the direction of the text of this paragraph. // // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. func (p *Paragraph) Direction() Direction { - panic("unimplemented") + return p.o.Direction() } +// TODO: what happens if the position is > len(input)? This should return an error. + // RunAt reports the Run at the given position of the input text. // // This method can be used for computing line breaks on paragraphs. func (p *Paragraph) RunAt(pos int) Run { - panic("unimplemented") + c := 0 + runNumber := 0 + for i, r := range p.o.runes { + c += len(r) + if pos < c { + runNumber = i + } + } + return p.o.Run(runNumber) +} + +func calculateOrdering(levels []level, runes []rune) Ordering { + var curDir Direction + + prevDir := Neutral + prevI := 0 + + o := Ordering{} + // lvl = 0,2,4,...: left to right + // lvl = 1,3,5,...: right to left + for i, lvl := range levels { + if lvl%2 == 0 { + curDir = LeftToRight + } else { + curDir = RightToLeft + } + if curDir != prevDir { + if i > 0 { + o.runes = append(o.runes, runes[prevI:i]) + o.directions = append(o.directions, prevDir) + o.startpos = append(o.startpos, prevI) + } + prevI = i + prevDir = curDir + } + } + o.runes = append(o.runes, runes[prevI:]) + o.directions = append(o.directions, prevDir) + o.startpos = append(o.startpos, prevI) + return o } // Order computes the visual ordering of all the runs in a Paragraph. func (p *Paragraph) Order() (Ordering, error) { - panic("unimplemented") + if len(p.types) == 0 { + return Ordering{}, nil + } + + for _, fn := range p.opts { + fn(&p.options) + } + lvl := level(-1) + if p.options.defaultDirection == RightToLeft { + lvl = 1 + } + para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl) + if err != nil { + return Ordering{}, err + } + + levels := para.getLevels([]int{len(p.types)}) + + p.o = calculateOrdering(levels, p.runes) + return p.o, nil } // Line computes the visual ordering of runs for a single line starting and // ending at the given positions in the original text. func (p *Paragraph) Line(start, end int) (Ordering, error) { - panic("unimplemented") + lineTypes := p.types[start:end] + para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) + if err != nil { + return Ordering{}, err + } + levels := para.getLevels([]int{len(lineTypes)}) + o := calculateOrdering(levels, p.runes[start:end]) + return o, nil } // An Ordering holds the computed visual order of runs of a Paragraph. Calling // SetBytes or SetString on the originating Paragraph invalidates an Ordering. // The methods of an Ordering should only be called by one goroutine at a time. -type Ordering struct{} +type Ordering struct { + runes [][]rune + directions []Direction + startpos []int +} // Direction reports the directionality of the runs. // // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. func (o *Ordering) Direction() Direction { - panic("unimplemented") + return o.directions[0] } // NumRuns returns the number of runs. func (o *Ordering) NumRuns() int { - panic("unimplemented") + return len(o.runes) } // Run returns the ith run within the ordering. func (o *Ordering) Run(i int) Run { - panic("unimplemented") + r := Run{ + runes: o.runes[i], + direction: o.directions[i], + startpos: o.startpos[i], + } + return r } // TODO: perhaps with options. @@ -155,16 +286,19 @@ func (o *Ordering) Run(i int) Run { // A Run is a continuous sequence of characters of a single direction. type Run struct { + runes []rune + direction Direction + startpos int } // String returns the text of the run in its original order. func (r *Run) String() string { - panic("unimplemented") + return string(r.runes) } // Bytes returns the text of the run in its original order. func (r *Run) Bytes() []byte { - panic("unimplemented") + return []byte(r.String()) } // TODO: methods for @@ -174,25 +308,52 @@ func (r *Run) Bytes() []byte { // Direction reports the direction of the run. func (r *Run) Direction() Direction { - panic("unimplemented") + return r.direction } -// Position of the Run within the text passed to SetBytes or SetString of the +// Pos returns the position of the Run within the text passed to SetBytes or SetString of the // originating Paragraph value. func (r *Run) Pos() (start, end int) { - panic("unimplemented") + return r.startpos, r.startpos + len(r.runes) - 1 } // AppendReverse reverses the order of characters of in, appends them to out, // and returns the result. Modifiers will still follow the runes they modify. // Brackets are replaced with their counterparts. func AppendReverse(out, in []byte) []byte { - panic("unimplemented") + ret := make([]byte, len(in)+len(out)) + copy(ret, out) + inRunes := bytes.Runes(in) + + for i, r := range inRunes { + prop, _ := LookupRune(r) + if prop.IsBracket() { + inRunes[i] = prop.reverseBracket(r) + } + } + + for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 { + inRunes[i], inRunes[j] = inRunes[j], inRunes[i] + } + copy(ret[len(out):], string(inRunes)) + + return ret } // ReverseString reverses the order of characters in s and returns a new string. // Modifiers will still follow the runes they modify. Brackets are replaced with // their counterparts. func ReverseString(s string) string { - panic("unimplemented") + input := []rune(s) + li := len(input) + ret := make([]rune, li) + for i, r := range input { + prop, _ := LookupRune(r) + if prop.IsBracket() { + ret[li-i-1] = prop.reverseBracket(r) + } else { + ret[li-i-1] = r + } + } + return string(ret) } diff --git a/unicode/bidi/bidi_test.go b/unicode/bidi/bidi_test.go new file mode 100644 index 000000000..88572f565 --- /dev/null +++ b/unicode/bidi/bidi_test.go @@ -0,0 +1,347 @@ +package bidi + +import ( + "log" + "testing" +) + +type runInformation struct { + str string + dir Direction + start int + end int +} + +func TestSimple(t *testing.T) { + str := "Hellö" + p := Paragraph{} + p.SetString(str) + order, err := p.Order() + if err != nil { + log.Fatal(err) + } + expectedRuns := []runInformation{ + {"Hellö", LeftToRight, 0, 4}, + } + + if !p.IsLeftToRight() { + t.Error("p.IsLeftToRight() == false; want true") + } + if nr, want := order.NumRuns(), len(expectedRuns); nr != want { + t.Errorf("order.NumRuns() = %d; want %d", nr, want) + } + for i, want := range expectedRuns { + r := order.Run(i) + if got := r.String(); got != want.str { + t.Errorf("Run(%d) = %q; want %q", i, got, want.str) + } + if s, e := r.Pos(); s != want.start || e != want.end { + t.Errorf("Run(%d).start = %d, .end = %d; want start %d, end %d", i, s, e, want.start, want.end) + } + if d := r.Direction(); d != want.dir { + t.Errorf("Run(%d).Direction = %d; want %d", i, d, want.dir) + } + } +} + +func TestMixed(t *testing.T) { + str := `العاشر ليونيكود (Unicode Conference)، الذي سيعقد في 10-12 آذار 1997 مبدينة` + p := Paragraph{} + p.SetString(str) + order, err := p.Order() + if err != nil { + log.Fatal(err) + } + if p.IsLeftToRight() { + t.Error("p.IsLeftToRight() == true; want false") + } + + expectedRuns := []runInformation{ + {"العاشر ليونيكود (", RightToLeft, 0, 16}, + {"Unicode Conference", LeftToRight, 17, 34}, + {")، الذي سيعقد في ", RightToLeft, 35, 51}, + {"10", LeftToRight, 52, 53}, + {"-", RightToLeft, 54, 54}, + {"12", LeftToRight, 55, 56}, + {" آذار ", RightToLeft, 57, 62}, + {"1997", LeftToRight, 63, 66}, + {" مبدينة", RightToLeft, 67, 73}, + } + + if nr, want := order.NumRuns(), len(expectedRuns); nr != want { + t.Errorf("order.NumRuns() = %d; want %d", nr, want) + } + + for i, want := range expectedRuns { + r := order.Run(i) + if got := r.String(); got != want.str { + t.Errorf("Run(%d) = %q; want %q", i, got, want.str) + } + if s, e := r.Pos(); s != want.start || e != want.end { + t.Errorf("Run(%d).start = %d, .end = %d; want start = %d, end = %d", i, s, e, want.start, want.end) + } + if d := r.Direction(); d != want.dir { + t.Errorf("Run(%d).Direction = %d; want %d", i, d, want.dir) + } + } +} + +func TestExplicitIsolate(t *testing.T) { + // https://www.w3.org/International/articles/inline-bidi-markup/uba-basics.en#beyond + str := "The names of these states in Arabic are \u2067مصر\u2069, \u2067البحرين\u2069 and \u2067الكويت\u2069 respectively." + p := Paragraph{} + p.SetString(str) + order, err := p.Order() + if err != nil { + log.Fatal(err) + } + if !p.IsLeftToRight() { + t.Error("p.IsLeftToRight() == false; want true") + } + + expectedRuns := []runInformation{ + {"The names of these states in Arabic are \u2067", LeftToRight, 0, 40}, + {"مصر", RightToLeft, 41, 43}, + {"\u2069, \u2067", LeftToRight, 44, 47}, + {"البحرين", RightToLeft, 48, 54}, + {"\u2069 and \u2067", LeftToRight, 55, 61}, + {"الكويت", RightToLeft, 62, 67}, + {"\u2069 respectively.", LeftToRight, 68, 82}, + } + + if nr, want := order.NumRuns(), len(expectedRuns); nr != want { + t.Errorf("order.NumRuns() = %d; want %d", nr, want) + } + + for i, want := range expectedRuns { + r := order.Run(i) + if got := r.String(); got != want.str { + t.Errorf("Run(%d) = %q; want %q", i, got, want.str) + } + if s, e := r.Pos(); s != want.start || e != want.end { + t.Errorf("Run(%d).start = %d, .end = %d; want start = %d, end = %d", i, s, e, want.start, want.end) + } + if d := r.Direction(); d != want.dir { + t.Errorf("Run(%d).Direction = %d; want %d", i, d, want.dir) + } + } +} + +func TestWithoutExplicitIsolate(t *testing.T) { + str := "The names of these states in Arabic are مصر, البحرين and الكويت respectively." + p := Paragraph{} + p.SetString(str) + order, err := p.Order() + if err != nil { + log.Fatal(err) + } + if !p.IsLeftToRight() { + t.Error("p.IsLeftToRight() == false; want true") + } + + expectedRuns := []runInformation{ + {"The names of these states in Arabic are ", LeftToRight, 0, 39}, + {"مصر, البحرين", RightToLeft, 40, 51}, + {" and ", LeftToRight, 52, 56}, + {"الكويت", RightToLeft, 57, 62}, + {" respectively.", LeftToRight, 63, 76}, + } + + if nr, want := order.NumRuns(), len(expectedRuns); nr != want { + t.Errorf("order.NumRuns() = %d; want %d", nr, want) + } + + for i, want := range expectedRuns { + r := order.Run(i) + if got := r.String(); got != want.str { + t.Errorf("Run(%d) = %q; want %q", i, got, want.str) + } + if s, e := r.Pos(); s != want.start || e != want.end { + t.Errorf("Run(%d).start = %d, .end = %d; want start = %d, end = %d", i, s, e, want.start, want.end) + } + if d := r.Direction(); d != want.dir { + t.Errorf("Run(%d).Direction = %d; want %d", i, d, want.dir) + } + } +} + +func TestLongUTF8(t *testing.T) { + str := `𠀀` + p := Paragraph{} + p.SetString(str) + order, err := p.Order() + if err != nil { + log.Fatal(err) + } + if !p.IsLeftToRight() { + t.Error("p.IsLeftToRight() == false; want true") + } + + expectedRuns := []runInformation{ + {"𠀀", LeftToRight, 0, 0}, + } + + if nr, want := order.NumRuns(), len(expectedRuns); nr != want { + t.Errorf("order.NumRuns() = %d; want %d", nr, want) + } + + for i, want := range expectedRuns { + r := order.Run(i) + if got := r.String(); got != want.str { + t.Errorf("Run(%d) = %q; want %q", i, got, want.str) + } + if s, e := r.Pos(); s != want.start || e != want.end { + t.Errorf("Run(%d).start = %d, .end = %d; want start = %d, end = %d", i, s, e, want.start, want.end) + } + if d := r.Direction(); d != want.dir { + t.Errorf("Run(%d).Direction = %d; want %d", i, d, want.dir) + } + } +} + +func TestLLongUTF8(t *testing.T) { + strTester := []struct { + str string + l int + }{ + {"ö", 2}, + {"ॡ", 3}, + {`𠀀`, 4}, + } + for _, st := range strTester { + str := st.str + want := st.l + if _, l := LookupString(str); l != want { + t.Errorf("LookupString(%q) length = %d; want %d", str, l, want) + } + + } + +} + +func TestMixedSimple(t *testing.T) { + str := `Uا` + p := Paragraph{} + p.SetString(str) + order, err := p.Order() + if err != nil { + log.Fatal(err) + } + if !p.IsLeftToRight() { + t.Error("p.IsLeftToRight() == false; want true") + } + + expectedRuns := []runInformation{ + {"U", LeftToRight, 0, 0}, + {"ا", RightToLeft, 1, 1}, + } + + if nr, want := order.NumRuns(), len(expectedRuns); nr != want { + t.Errorf("order.NumRuns() = %d; want %d", nr, want) + } + + for i, want := range expectedRuns { + r := order.Run(i) + if got := r.String(); got != want.str { + t.Errorf("Run(%d) = %q; want %q", i, got, want.str) + } + if s, e := r.Pos(); s != want.start || e != want.end { + t.Errorf("Run(%d).start = %d, .end = %d; want start = %d, end = %d", i, s, e, want.start, want.end) + } + if d := r.Direction(); d != want.dir { + t.Errorf("Run(%d).Direction = %d; want %d", i, d, want.dir) + } + } +} + +func TestDefaultDirection(t *testing.T) { + str := "+" + p := Paragraph{} + p.SetString(str, DefaultDirection(RightToLeft)) + _, err := p.Order() + if err != nil { + t.Error(err) + t.Fail() + } + if want, dir := false, p.IsLeftToRight(); want != dir { + t.Errorf("p.IsLeftToRight() = %t; want %t", dir, want) + } + p.SetString(str, DefaultDirection(LeftToRight)) + _, err = p.Order() + if err != nil { + t.Error(err) + t.Fail() + } + if want, dir := true, p.IsLeftToRight(); want != dir { + t.Errorf("p.IsLeftToRight() = %t; want %t", dir, want) + } + +} + +func TestEmpty(t *testing.T) { + p := Paragraph{} + p.SetBytes([]byte{}) + o, err := p.Order() + if err != nil { + t.Error("p.Order() return err != nil; want err == nil") + } + if nr := o.NumRuns(); nr != 0 { + t.Errorf("o.NumRuns() = %d; want 0", nr) + } +} + +func TestNewline(t *testing.T) { + str := "Hello\nworld" + p := Paragraph{} + n, err := p.SetString(str) + if err != nil { + t.Error(err) + } + // 6 is the length up to and including the \n + if want := 6; n != want { + t.Errorf("SetString(%q) = nil, %d; want nil, %d", str, n, want) + } +} + +func TestDoubleSetString(t *testing.T) { + str := "العاشر ليونيكود (Unicode Conference)،" + p := Paragraph{} + _, err := p.SetString(str) + if err != nil { + t.Error(err) + } + _, err = p.SetString(str) + if err != nil { + t.Error(err) + } + _, err = p.Order() + if err != nil { + t.Error(err) + } +} + +func TestReverseString(t *testing.T) { + input := "(Hello)" + want := "(olleH)" + if str := ReverseString(input); str != want { + t.Errorf("ReverseString(%s) = %q; want %q", input, str, want) + } +} + +func TestAppendReverse(t *testing.T) { + testcase := []struct { + inString string + outString string + want string + }{ + {"", "Hëllo", "Hëllo"}, + {"nice (wörld)", "", "(dlröw) ecin"}, + {"nice (wörld)", "Hëllo", "Hëllo(dlröw) ecin"}, + } + for _, tc := range testcase { + if r := AppendReverse([]byte(tc.outString), []byte(tc.inString)); string(r) != tc.want { + t.Errorf("AppendReverse([]byte(%q), []byte(%q) = %q; want %q", tc.outString, tc.inString, string(r), tc.want) + } + } + +} diff --git a/unicode/bidi/core.go b/unicode/bidi/core.go index 50deb6600..e4c081101 100644 --- a/unicode/bidi/core.go +++ b/unicode/bidi/core.go @@ -4,7 +4,10 @@ package bidi -import "log" +import ( + "fmt" + "log" +) // This implementation is a port based on the reference implementation found at: // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/ @@ -97,13 +100,20 @@ type paragraph struct { // rune (suggested is the rune of the open bracket for opening and matching // close brackets, after normalization). The embedding levels are optional, but // may be supplied to encode embedding levels of styled text. -// -// TODO: return an error. -func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph { - validateTypes(types) - validatePbTypes(pairTypes) - validatePbValues(pairValues, pairTypes) - validateParagraphEmbeddingLevel(levels) +func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) { + var err error + if err = validateTypes(types); err != nil { + return nil, err + } + if err = validatePbTypes(pairTypes); err != nil { + return nil, err + } + if err = validatePbValues(pairValues, pairTypes); err != nil { + return nil, err + } + if err = validateParagraphEmbeddingLevel(levels); err != nil { + return nil, err + } p := ¶graph{ initialTypes: append([]Class(nil), types...), @@ -115,7 +125,7 @@ func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, lev resultTypes: append([]Class(nil), types...), } p.run() - return p + return p, nil } func (p *paragraph) Len() int { return len(p.initialTypes) } @@ -1001,58 +1011,61 @@ func typeForLevel(level level) Class { return R } -// TODO: change validation to not panic - -func validateTypes(types []Class) { +func validateTypes(types []Class) error { if len(types) == 0 { - log.Panic("types is null") + return fmt.Errorf("types is null") } for i, t := range types[:len(types)-1] { if t == B { - log.Panicf("B type before end of paragraph at index: %d", i) + return fmt.Errorf("B type before end of paragraph at index: %d", i) } } + return nil } -func validateParagraphEmbeddingLevel(embeddingLevel level) { +func validateParagraphEmbeddingLevel(embeddingLevel level) error { if embeddingLevel != implicitLevel && embeddingLevel != 0 && embeddingLevel != 1 { - log.Panicf("illegal paragraph embedding level: %d", embeddingLevel) + return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel) } + return nil } -func validateLineBreaks(linebreaks []int, textLength int) { +func validateLineBreaks(linebreaks []int, textLength int) error { prev := 0 for i, next := range linebreaks { if next <= prev { - log.Panicf("bad linebreak: %d at index: %d", next, i) + return fmt.Errorf("bad linebreak: %d at index: %d", next, i) } prev = next } if prev != textLength { - log.Panicf("last linebreak was %d, want %d", prev, textLength) + return fmt.Errorf("last linebreak was %d, want %d", prev, textLength) } + return nil } -func validatePbTypes(pairTypes []bracketType) { +func validatePbTypes(pairTypes []bracketType) error { if len(pairTypes) == 0 { - log.Panic("pairTypes is null") + return fmt.Errorf("pairTypes is null") } for i, pt := range pairTypes { switch pt { case bpNone, bpOpen, bpClose: default: - log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i]) + return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i]) } } + return nil } -func validatePbValues(pairValues []rune, pairTypes []bracketType) { +func validatePbValues(pairValues []rune, pairTypes []bracketType) error { if pairValues == nil { - log.Panic("pairValues is null") + return fmt.Errorf("pairValues is null") } if len(pairTypes) != len(pairValues) { - log.Panic("pairTypes is different length from pairValues") + return fmt.Errorf("pairTypes is different length from pairValues") } + return nil } diff --git a/unicode/bidi/core_test.go b/unicode/bidi/core_test.go index b653399c4..1c928af96 100644 --- a/unicode/bidi/core_test.go +++ b/unicode/bidi/core_test.go @@ -55,7 +55,10 @@ func TestBidiCore(t *testing.T) { continue } lev := level(int(i) - 1) - par := newParagraph(types, pairTypes, pairValues, lev) + par, err := newParagraph(types, pairTypes, pairValues, lev) + if err != nil { + t.Error(err) + } if *testLevels { levels := par.getLevels([]int{len(types)}) @@ -142,7 +145,10 @@ func TestBidiCharacters(t *testing.T) { pairValues = append(pairValues, p.reverseBracket(r)) } } - par := newParagraph(types, pairTypes, pairValues, parLevel) + par, err := newParagraph(types, pairTypes, pairValues, parLevel) + if err != nil { + t.Error(err) + } // Test results: if got := par.embeddingLevel; got != wantLevel { From 72ead5faa1c2426bebe794973c1cbbcf0cb89e5c Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Thu, 1 Oct 2020 13:29:14 -0400 Subject: [PATCH 03/40] internal/export/idna: Allow specifying CheckHyphens and CheckJoiners This aligns with the options in the latest version of UTS 46, and in particular allows implementing the WHATWG URL Standard. Fixes golang/go#41732. Change-Id: Iab577eff4303f3eea64512d07d968c891acf126f Reviewed-on: https://go-review.googlesource.com/c/text/+/258837 Reviewed-by: Marcel van Lohuizen Reviewed-by: Nigel Tao Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Trust: Nigel Tao --- internal/export/idna/idna10.0.0.go | 113 ++++++++++++++++-------- internal/export/idna/idna10.0.0_test.go | 8 ++ internal/export/idna/idna9.0.0.go | 93 +++++++++++++------ internal/export/idna/idna9.0.0_test.go | 8 ++ 4 files changed, 154 insertions(+), 68 deletions(-) diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index 1244f9ce9..2ceb32768 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -65,15 +65,14 @@ func Transitional(transitional bool) Option { // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts // are longer than allowed by the RFC. +// +// This option corresponds to the VerifyDnsLength flag in UTS #46. func VerifyDNSLength(verify bool) Option { return func(o *options) { o.verifyDNSLength = verify } } // RemoveLeadingDots removes leading label separators. Leading runes that map to // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. -// -// This is the behavior suggested by the UTS #46 and is adopted by some -// browsers. func RemoveLeadingDots(remove bool) Option { return func(o *options) { o.removeLeadingDots = remove } } @@ -81,6 +80,8 @@ func RemoveLeadingDots(remove bool) Option { // ValidateLabels sets whether to check the mandatory label validation criteria // as defined in Section 5.4 of RFC 5891. This includes testing for correct use // of hyphens ('-'), normalization, validity of runes, and the context rules. +// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags +// in UTS #46. func ValidateLabels(enable bool) Option { return func(o *options) { // Don't override existing mappings, but set one that at least checks @@ -89,25 +90,48 @@ func ValidateLabels(enable bool) Option { o.mapping = normalize } o.trie = trie - o.validateLabels = enable - o.fromPuny = validateFromPunycode + o.checkJoiners = enable + o.checkHyphens = enable + if enable { + o.fromPuny = validateFromPunycode + } else { + o.fromPuny = nil + } + } +} + +// CheckHyphens sets whether to check for correct use of hyphens ('-') in +// labels. Most web browsers do not have this option set, since labels such as +// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. +// +// This option corresponds to the CheckHyphens flag in UTS #46. +func CheckHyphens(enable bool) Option { + return func(o *options) { o.checkHyphens = enable } +} + +// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix +// A of RFC 5892, concerning the use of joiner runes. +// +// This option corresponds to the CheckJoiners flag in UTS #46. +func CheckJoiners(enable bool) Option { + return func(o *options) { + o.trie = trie + o.checkJoiners = enable } } // StrictDomainName limits the set of permissible ASCII characters to those // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the -// hyphen). This is set by default for MapForLookup and ValidateForRegistration. +// hyphen). This is set by default for MapForLookup and ValidateForRegistration, +// but is only useful if ValidateLabels is set. // // This option is useful, for instance, for browsers that allow characters // outside this range, for example a '_' (U+005F LOW LINE). See -// http://www.rfc-editor.org/std/std3.txt for more details This option -// corresponds to the UseSTD3ASCIIRules option in UTS #46. +// http://www.rfc-editor.org/std/std3.txt for more details. +// +// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. func StrictDomainName(use bool) Option { - return func(o *options) { - o.trie = trie - o.useSTD3Rules = use - o.fromPuny = validateFromPunycode - } + return func(o *options) { o.useSTD3Rules = use } } // NOTE: the following options pull in tables. The tables should not be linked @@ -115,6 +139,8 @@ func StrictDomainName(use bool) Option { // BidiRule enables the Bidi rule as defined in RFC 5893. Any application // that relies on proper validation of labels should include this rule. +// +// This option corresponds to the CheckBidi flag in UTS #46. func BidiRule() Option { return func(o *options) { o.bidirule = bidirule.ValidString } } @@ -150,7 +176,8 @@ func MapForLookup() Option { type options struct { transitional bool useSTD3Rules bool - validateLabels bool + checkHyphens bool + checkJoiners bool verifyDNSLength bool removeLeadingDots bool @@ -223,8 +250,11 @@ func (p *Profile) String() string { if p.useSTD3Rules { s += ":UseSTD3Rules" } - if p.validateLabels { - s += ":ValidateLabels" + if p.checkHyphens { + s += ":CheckHyphens" + } + if p.checkJoiners { + s += ":CheckJoiners" } if p.verifyDNSLength { s += ":VerifyDNSLength" @@ -252,26 +282,29 @@ var ( punycode = &Profile{} lookup = &Profile{options{ - transitional: true, - useSTD3Rules: true, - validateLabels: true, - trie: trie, - fromPuny: validateFromPunycode, - mapping: validateAndMap, - bidirule: bidirule.ValidString, + transitional: true, + useSTD3Rules: true, + checkHyphens: true, + checkJoiners: true, + trie: trie, + fromPuny: validateFromPunycode, + mapping: validateAndMap, + bidirule: bidirule.ValidString, }} display = &Profile{options{ - useSTD3Rules: true, - validateLabels: true, - trie: trie, - fromPuny: validateFromPunycode, - mapping: validateAndMap, - bidirule: bidirule.ValidString, + useSTD3Rules: true, + checkHyphens: true, + checkJoiners: true, + trie: trie, + fromPuny: validateFromPunycode, + mapping: validateAndMap, + bidirule: bidirule.ValidString, }} registration = &Profile{options{ useSTD3Rules: true, - validateLabels: true, verifyDNSLength: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateRegistration, @@ -338,7 +371,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { } isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight labels.set(u) - if err == nil && p.validateLabels { + if err == nil && p.fromPuny != nil { err = p.fromPuny(p, u) } if err == nil { @@ -679,16 +712,18 @@ func (p *Profile) validateLabel(s string) (err error) { } return nil } - if !p.validateLabels { - return nil - } - trie := p.trie // p.validateLabels is only set if trie is set. - if len(s) > 4 && s[2] == '-' && s[3] == '-' { - return &labelError{s, "V2"} + if p.checkHyphens { + if len(s) > 4 && s[2] == '-' && s[3] == '-' { + return &labelError{s, "V2"} + } + if s[0] == '-' || s[len(s)-1] == '-' { + return &labelError{s, "V3"} + } } - if s[0] == '-' || s[len(s)-1] == '-' { - return &labelError{s, "V3"} + if !p.checkJoiners { + return nil } + trie := p.trie // p.checkJoiners is only set if trie is set. // TODO: merge the use of this in the trie. v, sz := trie.lookupString(s) x := info(v) diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index ed01f9343..66ea636db 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -31,6 +31,8 @@ func TestLabelErrors(t *testing.T) { lengthA := kind{"CheckLengthA", p.ToASCII} p = New(MapForLookup(), StrictDomainName(false)) std3 := kind{"STD3", p.ToASCII} + p = New(MapForLookup(), CheckHyphens(false)) + hyphens := kind{"CheckHyphens", p.ToASCII} testCases := []struct { kind @@ -85,6 +87,12 @@ func TestLabelErrors(t *testing.T) { {display, "*.foo.com", "*.foo.com", "P1"}, {std3, "*.foo.com", "*.foo.com", ""}, + // Hyphens + {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"}, + {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""}, + {display, "-label-.com", "-label-.com", "V3"}, + {hyphens, "-label-.com", "-label-.com", ""}, + // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return // lab9.be. diff --git a/internal/export/idna/idna9.0.0.go b/internal/export/idna/idna9.0.0.go index 25f2ac3e8..1ea943136 100644 --- a/internal/export/idna/idna9.0.0.go +++ b/internal/export/idna/idna9.0.0.go @@ -64,15 +64,14 @@ func Transitional(transitional bool) Option { // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts // are longer than allowed by the RFC. +// +// This option corresponds to the VerifyDnsLength flag in UTS #46. func VerifyDNSLength(verify bool) Option { return func(o *options) { o.verifyDNSLength = verify } } // RemoveLeadingDots removes leading label separators. Leading runes that map to // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. -// -// This is the behavior suggested by the UTS #46 and is adopted by some -// browsers. func RemoveLeadingDots(remove bool) Option { return func(o *options) { o.removeLeadingDots = remove } } @@ -80,6 +79,8 @@ func RemoveLeadingDots(remove bool) Option { // ValidateLabels sets whether to check the mandatory label validation criteria // as defined in Section 5.4 of RFC 5891. This includes testing for correct use // of hyphens ('-'), normalization, validity of runes, and the context rules. +// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags +// in UTS #46. func ValidateLabels(enable bool) Option { return func(o *options) { // Don't override existing mappings, but set one that at least checks @@ -88,25 +89,48 @@ func ValidateLabels(enable bool) Option { o.mapping = normalize } o.trie = trie - o.validateLabels = enable - o.fromPuny = validateFromPunycode + o.checkJoiners = enable + o.checkHyphens = enable + if enable { + o.fromPuny = validateFromPunycode + } else { + o.fromPuny = nil + } + } +} + +// CheckHyphens sets whether to check for correct use of hyphens ('-') in +// labels. Most web browsers do not have this option set, since labels such as +// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. +// +// This option corresponds to the CheckHyphens flag in UTS #46. +func CheckHyphens(enable bool) Option { + return func(o *options) { o.checkHyphens = enable } +} + +// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix +// A of RFC 5892, concerning the use of joiner runes. +// +// This option corresponds to the CheckJoiners flag in UTS #46. +func CheckJoiners(enable bool) Option { + return func(o *options) { + o.trie = trie + o.checkJoiners = enable } } // StrictDomainName limits the set of permissable ASCII characters to those // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the -// hyphen). This is set by default for MapForLookup and ValidateForRegistration. +// hyphen). This is set by default for MapForLookup and ValidateForRegistration, +// but is only useful if ValidateLabels is set. // // This option is useful, for instance, for browsers that allow characters // outside this range, for example a '_' (U+005F LOW LINE). See -// http://www.rfc-editor.org/std/std3.txt for more details This option -// corresponds to the UseSTD3ASCIIRules option in UTS #46. +// http://www.rfc-editor.org/std/std3.txt for more details. +// +// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. func StrictDomainName(use bool) Option { - return func(o *options) { - o.trie = trie - o.useSTD3Rules = use - o.fromPuny = validateFromPunycode - } + return func(o *options) { o.useSTD3Rules = use } } // NOTE: the following options pull in tables. The tables should not be linked @@ -114,6 +138,8 @@ func StrictDomainName(use bool) Option { // BidiRule enables the Bidi rule as defined in RFC 5893. Any application // that relies on proper validation of labels should include this rule. +// +// This option corresponds to the CheckBidi flag in UTS #46. func BidiRule() Option { return func(o *options) { o.bidirule = bidirule.ValidString } } @@ -150,7 +176,8 @@ func MapForLookup() Option { type options struct { transitional bool useSTD3Rules bool - validateLabels bool + checkHyphens bool + checkJoiners bool verifyDNSLength bool removeLeadingDots bool @@ -223,8 +250,11 @@ func (p *Profile) String() string { if p.useSTD3Rules { s += ":UseSTD3Rules" } - if p.validateLabels { - s += ":ValidateLabels" + if p.checkHyphens { + s += ":CheckHyphens" + } + if p.checkJoiners { + s += ":CheckJoiners" } if p.verifyDNSLength { s += ":VerifyDNSLength" @@ -253,9 +283,10 @@ var ( punycode = &Profile{} lookup = &Profile{options{ transitional: true, - useSTD3Rules: true, - validateLabels: true, removeLeadingDots: true, + useSTD3Rules: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, @@ -263,8 +294,9 @@ var ( }} display = &Profile{options{ useSTD3Rules: true, - validateLabels: true, removeLeadingDots: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, @@ -272,8 +304,9 @@ var ( }} registration = &Profile{options{ useSTD3Rules: true, - validateLabels: true, verifyDNSLength: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateRegistration, @@ -337,7 +370,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { continue } labels.set(u) - if err == nil && p.validateLabels { + if err == nil && p.fromPuny != nil { err = p.fromPuny(p, u) } if err == nil { @@ -627,16 +660,18 @@ func (p *Profile) validateLabel(s string) error { if p.bidirule != nil && !p.bidirule(s) { return &labelError{s, "B"} } - if !p.validateLabels { - return nil - } - trie := p.trie // p.validateLabels is only set if trie is set. - if len(s) > 4 && s[2] == '-' && s[3] == '-' { - return &labelError{s, "V2"} + if p.checkHyphens { + if len(s) > 4 && s[2] == '-' && s[3] == '-' { + return &labelError{s, "V2"} + } + if s[0] == '-' || s[len(s)-1] == '-' { + return &labelError{s, "V3"} + } } - if s[0] == '-' || s[len(s)-1] == '-' { - return &labelError{s, "V3"} + if !p.checkJoiners { + return nil } + trie := p.trie // p.checkJoiners is only set if trie is set. // TODO: merge the use of this in the trie. v, sz := trie.lookupString(s) x := info(v) diff --git a/internal/export/idna/idna9.0.0_test.go b/internal/export/idna/idna9.0.0_test.go index 7047d744a..03b1267c3 100644 --- a/internal/export/idna/idna9.0.0_test.go +++ b/internal/export/idna/idna9.0.0_test.go @@ -31,6 +31,8 @@ func TestLabelErrors(t *testing.T) { lengthA := kind{"CheckLengthA", p.ToASCII} p = New(MapForLookup(), StrictDomainName(false)) std3 := kind{"STD3", p.ToASCII} + p = New(MapForLookup(), CheckHyphens(false)) + hyphens := kind{"CheckHyphens", p.ToASCII} testCases := []struct { kind @@ -81,6 +83,12 @@ func TestLabelErrors(t *testing.T) { {display, "*.foo.com", "*.foo.com", "P1"}, {std3, "*.foo.com", "*.foo.com", ""}, + // Hyphens + {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"}, + {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""}, + {display, "-label-.com", "-label-.com", "V3"}, + {hyphens, "-label-.com", "-label-.com", ""}, + // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return // lab9.be. From 305da72387dee5313eca46df90087abcbeb3f520 Mon Sep 17 00:00:00 2001 From: "igor.bolotnikov" Date: Mon, 26 Oct 2020 17:55:30 +0300 Subject: [PATCH 04/40] internal/number: possible out of range error avoiding Case when r.Increment > 0 and r.IncrementScale == len(scales) in *Decimal.ConvertFloat method is taken into account. It led to unexpected out of range panic. fixes golang/go#42147 Change-Id: Ic26e67010b766bdbd322a3853489f6d1ecb0dcfc Reviewed-on: https://go-review.googlesource.com/c/text/+/265021 Reviewed-by: Marcel van Lohuizen Trust: Russ Cox --- internal/number/decimal.go | 2 +- internal/number/decimal_test.go | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/number/decimal.go b/internal/number/decimal.go index 9b4035ec4..cb656db6c 100644 --- a/internal/number/decimal.go +++ b/internal/number/decimal.go @@ -379,7 +379,7 @@ func (d *Decimal) ConvertFloat(r RoundingContext, x float64, size int) { if r.Increment > 0 { scale := int(r.IncrementScale) mult := 1.0 - if scale > len(scales) { + if scale >= len(scales) { mult = math.Pow(10, float64(scale)) } else { mult = scales[scale] diff --git a/internal/number/decimal_test.go b/internal/number/decimal_test.go index 97c7e25b6..670e806e5 100644 --- a/internal/number/decimal_test.go +++ b/internal/number/decimal_test.go @@ -248,6 +248,10 @@ func TestConvert(t *testing.T) { inc0_05 := RoundingContext{Increment: 5, IncrementScale: 2} inc0_05.SetScale(2) inc50 := RoundingContext{Increment: 50} + incScaleEqualToScalesLen := RoundingContext{Increment: 1, IncrementScale: 0} + if len(scales) <= math.MaxUint8 { + incScaleEqualToScalesLen.IncrementScale = uint8(len(scales)) + } prec3 := RoundingContext{} prec3.SetPrecision(3) roundShift := RoundingContext{DigitShift: 2, MaxFractionDigits: 2} @@ -309,6 +313,7 @@ func TestConvert(t *testing.T) { {math.Inf(-1), inc50, "-Inf"}, {math.NaN(), inc50, "NaN"}, {"clearly not a number", scale2, "NaN"}, + {0.0, incScaleEqualToScalesLen, "0"}, } for _, tc := range testCases { var d Decimal From c27b9fd57aec08b1104313fb190f0ecc6d23095f Mon Sep 17 00:00:00 2001 From: Garry McNulty Date: Mon, 24 Feb 2020 21:48:34 +0000 Subject: [PATCH 05/40] encoding/simplifiedchinese: fix incorrect transform count to avoid infinite loop If the final character in the source buffer is a single '~' escape character, size is not updated. The loop either doesn't make progress if size is zero, or size retains the value from a previous iteration which may return an incorrect source bytes consumed count. Count the single '~' as 1 byte consumed. Fixes golang/go#35118 Change-Id: I3eadf1b4cb632a7c4dc4255325b467a6907c10c0 Reviewed-on: https://go-review.googlesource.com/c/text/+/220460 Reviewed-by: Nigel Tao Trust: Nigel Tao Trust: Marcel van Lohuizen --- encoding/simplifiedchinese/all_test.go | 2 ++ encoding/simplifiedchinese/hzgb2312.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/encoding/simplifiedchinese/all_test.go b/encoding/simplifiedchinese/all_test.go index b369da21a..a556c94dd 100644 --- a/encoding/simplifiedchinese/all_test.go +++ b/encoding/simplifiedchinese/all_test.go @@ -53,6 +53,8 @@ func TestNonRepertoire(t *testing.T) { {dec, GB18030, strings.Repeat("\xfe\x30", n), strings.Repeat("\ufffd0", n)}, {dec, HZGB2312, "~/", "\ufffd"}, + {dec, HZGB2312, "~", "\ufffd"}, + {dec, HZGB2312, "~~~", "~\ufffd"}, {dec, HZGB2312, "~{a\x80", "\ufffd"}, {dec, HZGB2312, "~{a\x80", "\ufffd"}, {dec, HZGB2312, "~{" + strings.Repeat("z~", n), strings.Repeat("\ufffd", n)}, diff --git a/encoding/simplifiedchinese/hzgb2312.go b/encoding/simplifiedchinese/hzgb2312.go index eb3157f0b..e15b7bf6a 100644 --- a/encoding/simplifiedchinese/hzgb2312.go +++ b/encoding/simplifiedchinese/hzgb2312.go @@ -57,7 +57,7 @@ loop: err = transform.ErrShortSrc break loop } - r = utf8.RuneError + r, size = utf8.RuneError, 1 goto write } size = 2 From 75a595aef632b07c6eeaaa805adb6f0f66e4130e Mon Sep 17 00:00:00 2001 From: Julie Qiu Date: Mon, 7 Dec 2020 16:52:09 -0500 Subject: [PATCH 06/40] README.md: add badge to pkg.go.dev Change-Id: Iae82dbce741d2eaa03bcae046842589f0764c933 Reviewed-on: https://go-review.googlesource.com/c/text/+/275890 Reviewed-by: Dmitri Shuralyov Run-TryBot: Julie Qiu Trust: Julie Qiu --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 61d5ee4d1..85cd3d001 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Go Text +[![Go Reference](https://pkg.go.dev/badge/golang.org/x/text.svg)](https://pkg.go.dev/golang.org/x/text) + This repository holds supplementary Go libraries for text processing, many involving Unicode. ## Semantic Versioning From 8f690f22cf1c026c950adddf3d45258bfd0912f0 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 19 Feb 2021 18:54:44 -0500 Subject: [PATCH 07/40] all: go fmt ./... Make all our package sources use Go 1.17 gofmt format (adding //go:build lines). Not strictly necessary but will avoid spurious changes as files are edited. Part of //go:build change (#41184). See https://golang.org/design/draft-gobuild Change-Id: I8f1f65728dad29d63bc5a1ea5fdb4cbac255c7e5 Reviewed-on: https://go-review.googlesource.com/c/text/+/294372 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Jason A. Donenfeld Reviewed-by: Ian Lance Taylor --- cases/gen.go | 1 + cases/gen_trieval.go | 1 + cases/icu.go | 1 + cases/icu_test.go | 1 + cases/tables10.0.0.go | 1 + cases/tables10.0.0_test.go | 1 + cases/tables11.0.0.go | 1 + cases/tables11.0.0_test.go | 1 + cases/tables12.0.0.go | 1 + cases/tables12.0.0_test.go | 1 + cases/tables13.0.0.go | 1 + cases/tables13.0.0_test.go | 1 + cases/tables9.0.0.go | 1 + cases/tables9.0.0_test.go | 1 + cmd/gotext/examples/rewrite/printer.go | 1 + collate/maketables.go | 1 + collate/tools/colcmp/darwin.go | 1 + collate/tools/colcmp/icu.go | 1 + currency/gen.go | 1 + currency/gen_common.go | 1 + date/gen.go | 1 + encoding/charmap/maketables.go | 1 + encoding/htmlindex/gen.go | 1 + encoding/ianaindex/ascii.go | 4 ++-- encoding/ianaindex/ascii_test.go | 2 +- encoding/ianaindex/gen.go | 1 + encoding/internal/identifier/gen.go | 1 + encoding/japanese/maketables.go | 1 + encoding/korean/maketables.go | 1 + encoding/simplifiedchinese/maketables.go | 1 + encoding/traditionalchinese/maketables.go | 1 + feature/plural/gen.go | 1 + feature/plural/gen_common.go | 1 + gen.go | 1 + internal/export/idna/gen.go | 1 + internal/export/idna/gen10.0.0_test.go | 1 + internal/export/idna/gen9.0.0_test.go | 1 + internal/export/idna/gen_common.go | 1 + internal/export/idna/gen_trieval.go | 1 + internal/export/idna/idna10.0.0.go | 2 ++ internal/export/idna/idna10.0.0_test.go | 1 + internal/export/idna/idna9.0.0.go | 2 ++ internal/export/idna/idna9.0.0_test.go | 1 + internal/export/idna/tables10.0.0.go | 1 + internal/export/idna/tables11.0.0.go | 1 + internal/export/idna/tables12.0.0.go | 1 + internal/export/idna/tables13.0.0.go | 1 + internal/export/idna/tables9.0.0.go | 1 + internal/export/unicode/gen.go | 1 + internal/language/compact/gen.go | 1 + internal/language/compact/gen_index.go | 1 + internal/language/compact/gen_parents.go | 1 + internal/language/gen.go | 1 + internal/language/gen_common.go | 1 + internal/number/gen.go | 1 + internal/number/gen_common.go | 1 + internal/testtext/gc.go | 1 + internal/testtext/gccgo.go | 1 + internal/testtext/go1_6.go | 1 + internal/testtext/go1_7.go | 1 + internal/triegen/data_test.go | 1 + internal/triegen/gen_test.go | 1 + language/display/maketables.go | 1 + language/gen.go | 1 + language/go1_1.go | 1 + language/go1_2.go | 1 + message/catalog/go19.go | 1 + message/catalog/gopre19.go | 1 + message/pipeline/go19_test.go | 1 + secure/bidirule/bidirule10.0.0.go | 1 + secure/bidirule/bidirule10.0.0_test.go | 1 + secure/bidirule/bidirule9.0.0.go | 1 + secure/bidirule/bidirule9.0.0_test.go | 1 + secure/precis/benchmark_test.go | 1 + secure/precis/enforce10.0.0_test.go | 1 + secure/precis/enforce9.0.0_test.go | 1 + secure/precis/gen.go | 1 + secure/precis/gen_trieval.go | 1 + secure/precis/tables10.0.0.go | 1 + secure/precis/tables11.0.0.go | 1 + secure/precis/tables12.0.0.go | 1 + secure/precis/tables13.0.0.go | 1 + secure/precis/tables9.0.0.go | 1 + unicode/bidi/gen.go | 1 + unicode/bidi/gen_ranges.go | 1 + unicode/bidi/gen_trieval.go | 1 + unicode/bidi/tables10.0.0.go | 1 + unicode/bidi/tables11.0.0.go | 1 + unicode/bidi/tables12.0.0.go | 1 + unicode/bidi/tables13.0.0.go | 1 + unicode/bidi/tables9.0.0.go | 1 + unicode/cldr/makexml.go | 1 + unicode/norm/data10.0.0_test.go | 1 + unicode/norm/data11.0.0_test.go | 1 + unicode/norm/data12.0.0_test.go | 1 + unicode/norm/data13.0.0_test.go | 1 + unicode/norm/data9.0.0_test.go | 1 + unicode/norm/forminfo_test.go | 1 + unicode/norm/maketables.go | 1 + unicode/norm/tables10.0.0.go | 1 + unicode/norm/tables11.0.0.go | 1 + unicode/norm/tables12.0.0.go | 1 + unicode/norm/tables13.0.0.go | 1 + unicode/norm/tables9.0.0.go | 1 + unicode/norm/triegen.go | 1 + unicode/rangetable/gen.go | 1 + unicode/rangetable/tables10.0.0.go | 1 + unicode/rangetable/tables11.0.0.go | 1 + unicode/rangetable/tables12.0.0.go | 1 + unicode/rangetable/tables13.0.0.go | 1 + unicode/rangetable/tables9.0.0.go | 1 + unicode/runenames/gen.go | 1 + unicode/runenames/tables10.0.0.go | 1 + unicode/runenames/tables11.0.0.go | 1 + unicode/runenames/tables12.0.0.go | 1 + unicode/runenames/tables13.0.0.go | 1 + unicode/runenames/tables9.0.0.go | 1 + width/gen.go | 1 + width/gen_common.go | 1 + width/gen_trieval.go | 1 + width/tables10.0.0.go | 1 + width/tables11.0.0.go | 1 + width/tables12.0.0.go | 1 + width/tables13.0.0.go | 1 + width/tables9.0.0.go | 1 + 125 files changed, 128 insertions(+), 3 deletions(-) diff --git a/cases/gen.go b/cases/gen.go index e7bccc8be..a041fede1 100644 --- a/cases/gen.go +++ b/cases/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // This program generates the trie for casing operations. The Unicode casing diff --git a/cases/gen_trieval.go b/cases/gen_trieval.go index 26fadd6c5..6c7222a73 100644 --- a/cases/gen_trieval.go +++ b/cases/gen_trieval.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/cases/icu.go b/cases/icu.go index 46530d1e4..2dc84b39e 100644 --- a/cases/icu.go +++ b/cases/icu.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build icu // +build icu package cases diff --git a/cases/icu_test.go b/cases/icu_test.go index e2b0adafd..f3ed83834 100644 --- a/cases/icu_test.go +++ b/cases/icu_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build icu // +build icu package cases diff --git a/cases/tables10.0.0.go b/cases/tables10.0.0.go index 5c87950cc..ca9923105 100644 --- a/cases/tables10.0.0.go +++ b/cases/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package cases diff --git a/cases/tables10.0.0_test.go b/cases/tables10.0.0_test.go index 186065c9f..779eaf56d 100644 --- a/cases/tables10.0.0_test.go +++ b/cases/tables10.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package cases diff --git a/cases/tables11.0.0.go b/cases/tables11.0.0.go index 84d270183..b1106b417 100644 --- a/cases/tables11.0.0.go +++ b/cases/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package cases diff --git a/cases/tables11.0.0_test.go b/cases/tables11.0.0_test.go index 2f441af6b..bc8fc54ba 100644 --- a/cases/tables11.0.0_test.go +++ b/cases/tables11.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package cases diff --git a/cases/tables12.0.0.go b/cases/tables12.0.0.go index edee17d09..ae7dc2407 100644 --- a/cases/tables12.0.0.go +++ b/cases/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package cases diff --git a/cases/tables12.0.0_test.go b/cases/tables12.0.0_test.go index b017b4385..04f3b6b80 100644 --- a/cases/tables12.0.0_test.go +++ b/cases/tables12.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package cases diff --git a/cases/tables13.0.0.go b/cases/tables13.0.0.go index 90a2453fc..cd874775b 100644 --- a/cases/tables13.0.0.go +++ b/cases/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package cases diff --git a/cases/tables13.0.0_test.go b/cases/tables13.0.0_test.go index 82f3e19c5..57c78a81f 100644 --- a/cases/tables13.0.0_test.go +++ b/cases/tables13.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package cases diff --git a/cases/tables9.0.0.go b/cases/tables9.0.0.go index 4949fa724..636d5d14d 100644 --- a/cases/tables9.0.0.go +++ b/cases/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package cases diff --git a/cases/tables9.0.0_test.go b/cases/tables9.0.0_test.go index 398d25331..ac804f48b 100644 --- a/cases/tables9.0.0_test.go +++ b/cases/tables9.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package cases diff --git a/cmd/gotext/examples/rewrite/printer.go b/cmd/gotext/examples/rewrite/printer.go index 9ed055620..309d19573 100644 --- a/cmd/gotext/examples/rewrite/printer.go +++ b/cmd/gotext/examples/rewrite/printer.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/collate/maketables.go b/collate/maketables.go index 3b25d7bfa..59afeb9c2 100644 --- a/collate/maketables.go +++ b/collate/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Collation table generator. diff --git a/collate/tools/colcmp/darwin.go b/collate/tools/colcmp/darwin.go index d2300e3e2..1be326751 100644 --- a/collate/tools/colcmp/darwin.go +++ b/collate/tools/colcmp/darwin.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build darwin // +build darwin package main diff --git a/collate/tools/colcmp/icu.go b/collate/tools/colcmp/icu.go index 76de40427..b816acf7c 100644 --- a/collate/tools/colcmp/icu.go +++ b/collate/tools/colcmp/icu.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build icu // +build icu package main diff --git a/currency/gen.go b/currency/gen.go index da7712fc5..4a2c85549 100644 --- a/currency/gen.go +++ b/currency/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Generator for currency-related data. diff --git a/currency/gen_common.go b/currency/gen_common.go index e1cea2494..e483bed26 100644 --- a/currency/gen_common.go +++ b/currency/gen_common.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/date/gen.go b/date/gen.go index 1fff91905..b58b9f75d 100644 --- a/date/gen.go +++ b/date/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/charmap/maketables.go b/encoding/charmap/maketables.go index f7941701e..70bf360c4 100644 --- a/encoding/charmap/maketables.go +++ b/encoding/charmap/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/htmlindex/gen.go b/encoding/htmlindex/gen.go index ac6b4a77f..d47f8dbd8 100644 --- a/encoding/htmlindex/gen.go +++ b/encoding/htmlindex/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/ianaindex/ascii.go b/encoding/ianaindex/ascii.go index 9792f8137..2b9824f82 100644 --- a/encoding/ianaindex/ascii.go +++ b/encoding/ianaindex/ascii.go @@ -10,8 +10,8 @@ import ( "golang.org/x/text/encoding" "golang.org/x/text/encoding/internal" - "golang.org/x/text/transform" "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/transform" ) type asciiDecoder struct { @@ -22,7 +22,7 @@ func (d asciiDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, er for _, c := range src { if c > unicode.MaxASCII { r := unicode.ReplacementChar - if nDst + utf8.RuneLen(r) > len(dst) { + if nDst+utf8.RuneLen(r) > len(dst) { err = transform.ErrShortDst break } diff --git a/encoding/ianaindex/ascii_test.go b/encoding/ianaindex/ascii_test.go index a184ab970..873ec7e6f 100644 --- a/encoding/ianaindex/ascii_test.go +++ b/encoding/ianaindex/ascii_test.go @@ -5,8 +5,8 @@ package ianaindex import ( - "unicode" "testing" + "unicode" "golang.org/x/text/encoding" ) diff --git a/encoding/ianaindex/gen.go b/encoding/ianaindex/gen.go index 1b61b820d..61d236621 100644 --- a/encoding/ianaindex/gen.go +++ b/encoding/ianaindex/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/internal/identifier/gen.go b/encoding/internal/identifier/gen.go index 26cfef9c6..039cb270c 100644 --- a/encoding/internal/identifier/gen.go +++ b/encoding/internal/identifier/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/japanese/maketables.go b/encoding/japanese/maketables.go index 023957a67..3066a39c0 100644 --- a/encoding/japanese/maketables.go +++ b/encoding/japanese/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/korean/maketables.go b/encoding/korean/maketables.go index c84034fb6..9552286f6 100644 --- a/encoding/korean/maketables.go +++ b/encoding/korean/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/simplifiedchinese/maketables.go b/encoding/simplifiedchinese/maketables.go index 55016c786..44988bfd1 100644 --- a/encoding/simplifiedchinese/maketables.go +++ b/encoding/simplifiedchinese/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/encoding/traditionalchinese/maketables.go b/encoding/traditionalchinese/maketables.go index cf7fdb31a..33f790598 100644 --- a/encoding/traditionalchinese/maketables.go +++ b/encoding/traditionalchinese/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/feature/plural/gen.go b/feature/plural/gen.go index 42f2f8676..b9c5f2493 100644 --- a/feature/plural/gen.go +++ b/feature/plural/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/feature/plural/gen_common.go b/feature/plural/gen_common.go index 24aa41505..bd92a4d45 100644 --- a/feature/plural/gen_common.go +++ b/feature/plural/gen_common.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/gen.go b/gen.go index 177bc0bda..04ff6035c 100644 --- a/gen.go +++ b/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // gen runs go generate on Unicode- and CLDR-related package in the text diff --git a/internal/export/idna/gen.go b/internal/export/idna/gen.go index 4ad98046f..1109c1895 100644 --- a/internal/export/idna/gen.go +++ b/internal/export/idna/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // This program generates the trie for idna operations. The Unicode casing diff --git a/internal/export/idna/gen10.0.0_test.go b/internal/export/idna/gen10.0.0_test.go index c5dfdde61..0ac9497d3 100644 --- a/internal/export/idna/gen10.0.0_test.go +++ b/internal/export/idna/gen10.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 // +build go1.10 package idna diff --git a/internal/export/idna/gen9.0.0_test.go b/internal/export/idna/gen9.0.0_test.go index 0e66f0b16..47b9ef90f 100644 --- a/internal/export/idna/gen9.0.0_test.go +++ b/internal/export/idna/gen9.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.10 // +build !go1.10 package idna diff --git a/internal/export/idna/gen_common.go b/internal/export/idna/gen_common.go index 360a02b8e..8d87ba28d 100644 --- a/internal/export/idna/gen_common.go +++ b/internal/export/idna/gen_common.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/internal/export/idna/gen_trieval.go b/internal/export/idna/gen_trieval.go index 0de99b08a..9d92407f2 100644 --- a/internal/export/idna/gen_trieval.go +++ b/internal/export/idna/gen_trieval.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index 2ceb32768..3e7bac3cb 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -2,7 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 // +build go1.10 + //go:generate go run gen.go gen_trieval.go gen_common.go // Package idna implements IDNA2008 using the compatibility processing diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index 66ea636db..4142bfa84 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 // +build go1.10 package idna diff --git a/internal/export/idna/idna9.0.0.go b/internal/export/idna/idna9.0.0.go index 1ea943136..7acecb800 100644 --- a/internal/export/idna/idna9.0.0.go +++ b/internal/export/idna/idna9.0.0.go @@ -2,7 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.10 // +build !go1.10 + //go:generate go run gen.go gen_trieval.go gen_common.go // Package idna implements IDNA2008 using the compatibility processing diff --git a/internal/export/idna/idna9.0.0_test.go b/internal/export/idna/idna9.0.0_test.go index 03b1267c3..b76b79628 100644 --- a/internal/export/idna/idna9.0.0_test.go +++ b/internal/export/idna/idna9.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.10 // +build !go1.10 package idna diff --git a/internal/export/idna/tables10.0.0.go b/internal/export/idna/tables10.0.0.go index 54fddb4b1..d1d62ef45 100644 --- a/internal/export/idna/tables10.0.0.go +++ b/internal/export/idna/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package idna diff --git a/internal/export/idna/tables11.0.0.go b/internal/export/idna/tables11.0.0.go index 8ce0811fd..167efba71 100644 --- a/internal/export/idna/tables11.0.0.go +++ b/internal/export/idna/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package idna diff --git a/internal/export/idna/tables12.0.0.go b/internal/export/idna/tables12.0.0.go index f39f0cb4c..ab40f7bcc 100644 --- a/internal/export/idna/tables12.0.0.go +++ b/internal/export/idna/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package idna diff --git a/internal/export/idna/tables13.0.0.go b/internal/export/idna/tables13.0.0.go index e8c7a36d7..390c5e56d 100644 --- a/internal/export/idna/tables13.0.0.go +++ b/internal/export/idna/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package idna diff --git a/internal/export/idna/tables9.0.0.go b/internal/export/idna/tables9.0.0.go index 8b65fa167..4074b5332 100644 --- a/internal/export/idna/tables9.0.0.go +++ b/internal/export/idna/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package idna diff --git a/internal/export/unicode/gen.go b/internal/export/unicode/gen.go index afdc94de6..9e4114fc3 100644 --- a/internal/export/unicode/gen.go +++ b/internal/export/unicode/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Unicode table generator. diff --git a/internal/language/compact/gen.go b/internal/language/compact/gen.go index 0c36a052f..400c1f08f 100644 --- a/internal/language/compact/gen.go +++ b/internal/language/compact/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Language tag table generator. diff --git a/internal/language/compact/gen_index.go b/internal/language/compact/gen_index.go index 136cefaf0..51c0d2d36 100644 --- a/internal/language/compact/gen_index.go +++ b/internal/language/compact/gen_index.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/internal/language/compact/gen_parents.go b/internal/language/compact/gen_parents.go index 9543d5832..1f4a6e855 100644 --- a/internal/language/compact/gen_parents.go +++ b/internal/language/compact/gen_parents.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/internal/language/gen.go b/internal/language/gen.go index cdcc7febc..27c43dc96 100644 --- a/internal/language/gen.go +++ b/internal/language/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Language tag table generator. diff --git a/internal/language/gen_common.go b/internal/language/gen_common.go index c419ceeb1..84dd4de1d 100644 --- a/internal/language/gen_common.go +++ b/internal/language/gen_common.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/internal/number/gen.go b/internal/number/gen.go index c836221bf..5341747fe 100644 --- a/internal/number/gen.go +++ b/internal/number/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/internal/number/gen_common.go b/internal/number/gen_common.go index b1b41a73a..f20ad7fef 100644 --- a/internal/number/gen_common.go +++ b/internal/number/gen_common.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/internal/testtext/gc.go b/internal/testtext/gc.go index a54e1bcbd..a6365f9b4 100644 --- a/internal/testtext/gc.go +++ b/internal/testtext/gc.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !gccgo // +build !gccgo package testtext diff --git a/internal/testtext/gccgo.go b/internal/testtext/gccgo.go index 30e98efff..e880af3da 100644 --- a/internal/testtext/gccgo.go +++ b/internal/testtext/gccgo.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build gccgo // +build gccgo package testtext diff --git a/internal/testtext/go1_6.go b/internal/testtext/go1_6.go index 7b2384738..f633e8956 100644 --- a/internal/testtext/go1_6.go +++ b/internal/testtext/go1_6.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.7 // +build !go1.7 package testtext diff --git a/internal/testtext/go1_7.go b/internal/testtext/go1_7.go index 66f9cf787..228bf4249 100644 --- a/internal/testtext/go1_7.go +++ b/internal/testtext/go1_7.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.7 // +build go1.7 package testtext diff --git a/internal/triegen/data_test.go b/internal/triegen/data_test.go index 91de547a5..a47efacd0 100644 --- a/internal/triegen/data_test.go +++ b/internal/triegen/data_test.go @@ -1,4 +1,5 @@ // This file is generated with "go test -tags generate". DO NOT EDIT! +//go:build !generate // +build !generate package triegen_test diff --git a/internal/triegen/gen_test.go b/internal/triegen/gen_test.go index 831627d7a..6bd137dd6 100644 --- a/internal/triegen/gen_test.go +++ b/internal/triegen/gen_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build generate // +build generate package triegen_test diff --git a/language/display/maketables.go b/language/display/maketables.go index b520e8ae6..a6cbcc6d6 100644 --- a/language/display/maketables.go +++ b/language/display/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Generator for display name tables. diff --git a/language/gen.go b/language/gen.go index 3004eb42c..445882e56 100644 --- a/language/gen.go +++ b/language/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Language tag table generator. diff --git a/language/go1_1.go b/language/go1_1.go index 380f4c09f..c7435583b 100644 --- a/language/go1_1.go +++ b/language/go1_1.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.2 // +build !go1.2 package language diff --git a/language/go1_2.go b/language/go1_2.go index 38268c57a..77aaaa299 100644 --- a/language/go1_2.go +++ b/language/go1_2.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.2 // +build go1.2 package language diff --git a/message/catalog/go19.go b/message/catalog/go19.go index 147fc7cf5..4e5e87f8f 100644 --- a/message/catalog/go19.go +++ b/message/catalog/go19.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.9 // +build go1.9 package catalog diff --git a/message/catalog/gopre19.go b/message/catalog/gopre19.go index a9753b905..9e14685a5 100644 --- a/message/catalog/gopre19.go +++ b/message/catalog/gopre19.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.9 // +build !go1.9 package catalog diff --git a/message/pipeline/go19_test.go b/message/pipeline/go19_test.go index c9517c130..10d9eda29 100644 --- a/message/pipeline/go19_test.go +++ b/message/pipeline/go19_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.9 // +build go1.9 package pipeline diff --git a/secure/bidirule/bidirule10.0.0.go b/secure/bidirule/bidirule10.0.0.go index e4c62289f..8a7392c4a 100644 --- a/secure/bidirule/bidirule10.0.0.go +++ b/secure/bidirule/bidirule10.0.0.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 // +build go1.10 package bidirule diff --git a/secure/bidirule/bidirule10.0.0_test.go b/secure/bidirule/bidirule10.0.0_test.go index 06ec5f5df..dd5271c0e 100644 --- a/secure/bidirule/bidirule10.0.0_test.go +++ b/secure/bidirule/bidirule10.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 // +build go1.10 package bidirule diff --git a/secure/bidirule/bidirule9.0.0.go b/secure/bidirule/bidirule9.0.0.go index 02b9e1e9d..bb0a92001 100644 --- a/secure/bidirule/bidirule9.0.0.go +++ b/secure/bidirule/bidirule9.0.0.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.10 // +build !go1.10 package bidirule diff --git a/secure/bidirule/bidirule9.0.0_test.go b/secure/bidirule/bidirule9.0.0_test.go index 008874ed3..e0bd1b486 100644 --- a/secure/bidirule/bidirule9.0.0_test.go +++ b/secure/bidirule/bidirule9.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.10 // +build !go1.10 package bidirule diff --git a/secure/precis/benchmark_test.go b/secure/precis/benchmark_test.go index 6337d0063..faed65437 100644 --- a/secure/precis/benchmark_test.go +++ b/secure/precis/benchmark_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.7 // +build go1.7 package precis diff --git a/secure/precis/enforce10.0.0_test.go b/secure/precis/enforce10.0.0_test.go index 34da1503b..7dd7bd550 100644 --- a/secure/precis/enforce10.0.0_test.go +++ b/secure/precis/enforce10.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 // +build go1.10 package precis diff --git a/secure/precis/enforce9.0.0_test.go b/secure/precis/enforce9.0.0_test.go index 209745551..98765cd7c 100644 --- a/secure/precis/enforce9.0.0_test.go +++ b/secure/precis/enforce9.0.0_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !go1.10 // +build !go1.10 package precis diff --git a/secure/precis/gen.go b/secure/precis/gen.go index 946acbaa1..99ead4298 100644 --- a/secure/precis/gen.go +++ b/secure/precis/gen.go @@ -5,6 +5,7 @@ // Unicode table generator. // Data read from the web. +//go:build ignore // +build ignore package main diff --git a/secure/precis/gen_trieval.go b/secure/precis/gen_trieval.go index 308510c9a..6fc5d1e3d 100644 --- a/secure/precis/gen_trieval.go +++ b/secure/precis/gen_trieval.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/secure/precis/tables10.0.0.go b/secure/precis/tables10.0.0.go index 362d2729b..816474950 100644 --- a/secure/precis/tables10.0.0.go +++ b/secure/precis/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package precis diff --git a/secure/precis/tables11.0.0.go b/secure/precis/tables11.0.0.go index 0db5a9e7b..a40e55d6c 100644 --- a/secure/precis/tables11.0.0.go +++ b/secure/precis/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package precis diff --git a/secure/precis/tables12.0.0.go b/secure/precis/tables12.0.0.go index a2392892a..254bbc793 100644 --- a/secure/precis/tables12.0.0.go +++ b/secure/precis/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package precis diff --git a/secure/precis/tables13.0.0.go b/secure/precis/tables13.0.0.go index e3ddd7302..aad68b35e 100644 --- a/secure/precis/tables13.0.0.go +++ b/secure/precis/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package precis diff --git a/secure/precis/tables9.0.0.go b/secure/precis/tables9.0.0.go index dacaf6a4e..2292b7cb0 100644 --- a/secure/precis/tables9.0.0.go +++ b/secure/precis/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package precis diff --git a/unicode/bidi/gen.go b/unicode/bidi/gen.go index 987fc169c..ab2b8bf6f 100644 --- a/unicode/bidi/gen.go +++ b/unicode/bidi/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/unicode/bidi/gen_ranges.go b/unicode/bidi/gen_ranges.go index 02c3b505d..10f09895f 100644 --- a/unicode/bidi/gen_ranges.go +++ b/unicode/bidi/gen_ranges.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/unicode/bidi/gen_trieval.go b/unicode/bidi/gen_trieval.go index 9cb994289..eac0f7645 100644 --- a/unicode/bidi/gen_trieval.go +++ b/unicode/bidi/gen_trieval.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/unicode/bidi/tables10.0.0.go b/unicode/bidi/tables10.0.0.go index d8c94e1bd..42fa8d72c 100644 --- a/unicode/bidi/tables10.0.0.go +++ b/unicode/bidi/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package bidi diff --git a/unicode/bidi/tables11.0.0.go b/unicode/bidi/tables11.0.0.go index 16b11db53..56a0e1ea2 100644 --- a/unicode/bidi/tables11.0.0.go +++ b/unicode/bidi/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package bidi diff --git a/unicode/bidi/tables12.0.0.go b/unicode/bidi/tables12.0.0.go index 647f2d427..baacf32b4 100644 --- a/unicode/bidi/tables12.0.0.go +++ b/unicode/bidi/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package bidi diff --git a/unicode/bidi/tables13.0.0.go b/unicode/bidi/tables13.0.0.go index c937d0976..f248effae 100644 --- a/unicode/bidi/tables13.0.0.go +++ b/unicode/bidi/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package bidi diff --git a/unicode/bidi/tables9.0.0.go b/unicode/bidi/tables9.0.0.go index 0ca0193eb..f517fdb20 100644 --- a/unicode/bidi/tables9.0.0.go +++ b/unicode/bidi/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package bidi diff --git a/unicode/cldr/makexml.go b/unicode/cldr/makexml.go index eb26306df..3c1fd26ea 100644 --- a/unicode/cldr/makexml.go +++ b/unicode/cldr/makexml.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // This tool generates types for the various XML formats of CLDR. diff --git a/unicode/norm/data10.0.0_test.go b/unicode/norm/data10.0.0_test.go index 56441e72f..54ee4d24f 100644 --- a/unicode/norm/data10.0.0_test.go +++ b/unicode/norm/data10.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package norm diff --git a/unicode/norm/data11.0.0_test.go b/unicode/norm/data11.0.0_test.go index ee8ec8b53..5b0e03743 100644 --- a/unicode/norm/data11.0.0_test.go +++ b/unicode/norm/data11.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package norm diff --git a/unicode/norm/data12.0.0_test.go b/unicode/norm/data12.0.0_test.go index 1555292a2..48cdbc5ff 100644 --- a/unicode/norm/data12.0.0_test.go +++ b/unicode/norm/data12.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package norm diff --git a/unicode/norm/data13.0.0_test.go b/unicode/norm/data13.0.0_test.go index 9455866e5..3c390d02a 100644 --- a/unicode/norm/data13.0.0_test.go +++ b/unicode/norm/data13.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package norm diff --git a/unicode/norm/data9.0.0_test.go b/unicode/norm/data9.0.0_test.go index b1be64d55..77175bc8f 100644 --- a/unicode/norm/data9.0.0_test.go +++ b/unicode/norm/data9.0.0_test.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package norm diff --git a/unicode/norm/forminfo_test.go b/unicode/norm/forminfo_test.go index e15ba9bee..96f097088 100644 --- a/unicode/norm/forminfo_test.go +++ b/unicode/norm/forminfo_test.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build test // +build test package norm diff --git a/unicode/norm/maketables.go b/unicode/norm/maketables.go index 30a3aa933..0d24d2117 100644 --- a/unicode/norm/maketables.go +++ b/unicode/norm/maketables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Normalization table generator. diff --git a/unicode/norm/tables10.0.0.go b/unicode/norm/tables10.0.0.go index 26fbd55a1..f5a078827 100644 --- a/unicode/norm/tables10.0.0.go +++ b/unicode/norm/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package norm diff --git a/unicode/norm/tables11.0.0.go b/unicode/norm/tables11.0.0.go index 2c58f09ba..cb7239c43 100644 --- a/unicode/norm/tables11.0.0.go +++ b/unicode/norm/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package norm diff --git a/unicode/norm/tables12.0.0.go b/unicode/norm/tables12.0.0.go index 7e1ae096e..11b273300 100644 --- a/unicode/norm/tables12.0.0.go +++ b/unicode/norm/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package norm diff --git a/unicode/norm/tables13.0.0.go b/unicode/norm/tables13.0.0.go index 9ea1b4214..96a130d30 100644 --- a/unicode/norm/tables13.0.0.go +++ b/unicode/norm/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package norm diff --git a/unicode/norm/tables9.0.0.go b/unicode/norm/tables9.0.0.go index 942906929..0175eae50 100644 --- a/unicode/norm/tables9.0.0.go +++ b/unicode/norm/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package norm diff --git a/unicode/norm/triegen.go b/unicode/norm/triegen.go index 45d711900..cce64352f 100644 --- a/unicode/norm/triegen.go +++ b/unicode/norm/triegen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // Trie table generator. diff --git a/unicode/rangetable/gen.go b/unicode/rangetable/gen.go index c2d36741c..fc4eee0a9 100644 --- a/unicode/rangetable/gen.go +++ b/unicode/rangetable/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/unicode/rangetable/tables10.0.0.go b/unicode/rangetable/tables10.0.0.go index 3dfcd8293..a34fbe294 100644 --- a/unicode/rangetable/tables10.0.0.go +++ b/unicode/rangetable/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package rangetable diff --git a/unicode/rangetable/tables11.0.0.go b/unicode/rangetable/tables11.0.0.go index 1df829f06..31949f0ff 100644 --- a/unicode/rangetable/tables11.0.0.go +++ b/unicode/rangetable/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package rangetable diff --git a/unicode/rangetable/tables12.0.0.go b/unicode/rangetable/tables12.0.0.go index e9d25c51e..9d4d724a6 100644 --- a/unicode/rangetable/tables12.0.0.go +++ b/unicode/rangetable/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package rangetable diff --git a/unicode/rangetable/tables13.0.0.go b/unicode/rangetable/tables13.0.0.go index 921e4f7b1..3dcaad44e 100644 --- a/unicode/rangetable/tables13.0.0.go +++ b/unicode/rangetable/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package rangetable diff --git a/unicode/rangetable/tables9.0.0.go b/unicode/rangetable/tables9.0.0.go index aef876d96..32a7ac1ce 100644 --- a/unicode/rangetable/tables9.0.0.go +++ b/unicode/rangetable/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package rangetable diff --git a/unicode/runenames/gen.go b/unicode/runenames/gen.go index 5633ba62d..4f4a0201a 100644 --- a/unicode/runenames/gen.go +++ b/unicode/runenames/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/unicode/runenames/tables10.0.0.go b/unicode/runenames/tables10.0.0.go index e74a21c2a..9ff9ad9cc 100644 --- a/unicode/runenames/tables10.0.0.go +++ b/unicode/runenames/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package runenames diff --git a/unicode/runenames/tables11.0.0.go b/unicode/runenames/tables11.0.0.go index 8b41b53b3..ba179885b 100644 --- a/unicode/runenames/tables11.0.0.go +++ b/unicode/runenames/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package runenames diff --git a/unicode/runenames/tables12.0.0.go b/unicode/runenames/tables12.0.0.go index 4f71b9af2..7355a569b 100644 --- a/unicode/runenames/tables12.0.0.go +++ b/unicode/runenames/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package runenames diff --git a/unicode/runenames/tables13.0.0.go b/unicode/runenames/tables13.0.0.go index b08f64835..4b74848a3 100644 --- a/unicode/runenames/tables13.0.0.go +++ b/unicode/runenames/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package runenames diff --git a/unicode/runenames/tables9.0.0.go b/unicode/runenames/tables9.0.0.go index 912c39613..0e8d95565 100644 --- a/unicode/runenames/tables9.0.0.go +++ b/unicode/runenames/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package runenames diff --git a/width/gen.go b/width/gen.go index 092277e1f..36e4156b3 100644 --- a/width/gen.go +++ b/width/gen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // This program generates the trie for width operations. The generated table diff --git a/width/gen_common.go b/width/gen_common.go index 601e75268..24c49ff55 100644 --- a/width/gen_common.go +++ b/width/gen_common.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/width/gen_trieval.go b/width/gen_trieval.go index c17334aa6..cf1a3b29f 100644 --- a/width/gen_trieval.go +++ b/width/gen_trieval.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package main diff --git a/width/tables10.0.0.go b/width/tables10.0.0.go index decb8e480..186b1d4ef 100644 --- a/width/tables10.0.0.go +++ b/width/tables10.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.10 && !go1.13 // +build go1.10,!go1.13 package width diff --git a/width/tables11.0.0.go b/width/tables11.0.0.go index 3c75e428f..990f7622f 100644 --- a/width/tables11.0.0.go +++ b/width/tables11.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.13 && !go1.14 // +build go1.13,!go1.14 package width diff --git a/width/tables12.0.0.go b/width/tables12.0.0.go index 543942b9e..85296297e 100644 --- a/width/tables12.0.0.go +++ b/width/tables12.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.14 && !go1.16 // +build go1.14,!go1.16 package width diff --git a/width/tables13.0.0.go b/width/tables13.0.0.go index 804264ca6..bac3f1aee 100644 --- a/width/tables13.0.0.go +++ b/width/tables13.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build go1.16 // +build go1.16 package width diff --git a/width/tables9.0.0.go b/width/tables9.0.0.go index 7069e2634..b3db84f6f 100644 --- a/width/tables9.0.0.go +++ b/width/tables9.0.0.go @@ -1,5 +1,6 @@ // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. +//go:build !go1.10 // +build !go1.10 package width From e3aa4adf54f644ca0cb35f1f1fb19b239c40ef04 Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Thu, 18 Feb 2021 08:31:59 +0100 Subject: [PATCH 08/40] language: allow variable number of types per key in -u- extension This also fixes CVE-2020-28851. This was an off-by one error, but is fixed by handling all cases according to the spec. These valid case seem to be not used in practice much, if at all, but the main benefit is that it makes all valid BCP 47 language tags also valid -u extensions. Fixing the code to handle BCP 47 results in cleaner and seemingly more robust code. The main difference is as follows. The old impementation assumed a -u- extension of the form: "-u" { "-" } { "-" "-" } [ ] where and are of length 3-8 and a is of length 2. According to the spec, though, the format is "-u" { "-" } { "-" { "-" } } [ ] So every key may be associated with zero or more types, instead of exactly one. The new code now handles this. The language.Tag.TypeForKey method is now defined to only return the first entry or nothing at all. This is for backwards compatibilty reasons. Fixes golang/go#42535 Change-Id: I23aec4e1c4d8807fc2ffc0eb3a08de2d8150219f Reviewed-on: https://go-review.googlesource.com/c/text/+/293549 Trust: Marcel van Lohuizen Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Reviewed-by: Russ Cox --- internal/language/language.go | 90 ++++++++++++++---------------- internal/language/language_test.go | 14 ++++- internal/language/parse.go | 28 +++++----- internal/language/parse_test.go | 23 ++++---- language/language.go | 4 ++ language/language_test.go | 13 +++++ language/parse_test.go | 22 ++++---- 7 files changed, 108 insertions(+), 86 deletions(-) diff --git a/internal/language/language.go b/internal/language/language.go index 1e74d1aff..f41aedcfc 100644 --- a/internal/language/language.go +++ b/internal/language/language.go @@ -303,9 +303,17 @@ func (t Tag) Extensions() []string { // are of the allowed values defined for the Unicode locale extension ('u') in // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // TypeForKey will traverse the inheritance chain to get the correct value. +// +// If there are multiple types associated with a key, only the first will be +// returned. If there is no type associated with a key, it returns the empty +// string. func (t Tag) TypeForKey(key string) string { - if start, end, _ := t.findTypeForKey(key); end != start { - return t.str[start:end] + if _, start, end, _ := t.findTypeForKey(key); end != start { + s := t.str[start:end] + if p := strings.IndexByte(s, '-'); p >= 0 { + s = s[:p] + } + return s } return "" } @@ -329,13 +337,13 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) { // Remove the setting if value is "". if value == "" { - start, end, _ := t.findTypeForKey(key) - if start != end { - // Remove key tag and leading '-'. - start -= 4 - + start, sep, end, _ := t.findTypeForKey(key) + if start != sep { // Remove a possible empty extension. - if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' { + switch { + case t.str[start-2] != '-': // has previous elements. + case end == len(t.str), // end of string + end+2 < len(t.str) && t.str[end+2] == '-': // end of extension start -= 2 } if start == int(t.pVariant) && end == len(t.str) { @@ -381,14 +389,14 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) { t.str = string(buf[:uStart+len(b)]) } else { s := t.str - start, end, hasExt := t.findTypeForKey(key) - if start == end { + start, sep, end, hasExt := t.findTypeForKey(key) + if start == sep { if hasExt { b = b[2:] } - t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:]) + t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:]) } else { - t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:]) + t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:]) } } return t, nil @@ -399,10 +407,10 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) { // wasn't found. The hasExt return value reports whether an -u extension was present. // Note: the extensions are typically very small and are likely to contain // only one key-type pair. -func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) { +func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) { p := int(t.pExt) if len(key) != 2 || p == len(t.str) || p == 0 { - return p, p, false + return p, p, p, false } s := t.str @@ -410,10 +418,10 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) { for p++; s[p] != 'u'; p++ { if s[p] > 'u' { p-- - return p, p, false + return p, p, p, false } if p = nextExtension(s, p); p == len(s) { - return len(s), len(s), false + return len(s), len(s), len(s), false } } // Proceed to the hyphen following the extension name. @@ -424,40 +432,28 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) { // Iterate over keys until we get the end of a section. for { - // p points to the hyphen preceding the current token. - if p3 := p + 3; s[p3] == '-' { - // Found a key. - // Check whether we just processed the key that was requested. - if curKey == key { - return start, p, true - } - // Set to the next key and continue scanning type tokens. - curKey = s[p+1 : p3] - if curKey > key { - return p, p, true - } - // Start of the type token sequence. - start = p + 4 - // A type is at least 3 characters long. - p += 7 // 4 + 3 - } else { - // Attribute or type, which is at least 3 characters long. - p += 4 - } - // p points past the third character of a type or attribute. - max := p + 5 // maximum length of token plus hyphen. - if len(s) < max { - max = len(s) + end = p + for p++; p < len(s) && s[p] != '-'; p++ { } - for ; p < max && s[p] != '-'; p++ { + n := p - end - 1 + if n <= 2 && curKey == key { + if sep < end { + sep++ + } + return start, sep, end, true } - // Bail if we have exhausted all tokens or if the next token starts - // a new extension. - if p == len(s) || s[p+2] == '-' { - if curKey == key { - return start, p, true + switch n { + case 0, // invalid string + 1: // next extension + return end, end, end, true + case 2: + // next key + curKey = s[end+1 : p] + if curKey > key { + return end, end, end, true } - return p, p, true + start = end + sep = p } } } diff --git a/internal/language/language_test.go b/internal/language/language_test.go index 6c7c10836..8244c1c8a 100644 --- a/internal/language/language_test.go +++ b/internal/language/language_test.go @@ -432,7 +432,9 @@ func TestSetTypeForKey(t *testing.T) { {"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false}, {"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false}, {"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false}, + {"co", "pinyin", "en-u-co-x-x", "en-u-co-pinyin-x-x", false}, {"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false}, + {"nu", "arabic", "en-u-co-phonebk-nu", "en-u-co-phonebk-nu-arabic", false}, // add to existing -u extension {"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false}, {"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false}, @@ -441,8 +443,12 @@ func TestSetTypeForKey(t *testing.T) { {"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false}, // remove pair {"co", "", "en-u-co-phonebk", "en", false}, + {"co", "", "en-u-co", "en", false}, + {"co", "", "en-u-co-v", "en", false}, + {"co", "", "en-u-co-v-", "en", false}, {"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false}, {"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false}, + {"co", "", "en-u-co-nu-arabic", "en-u-nu-arabic", false}, {"co", "", "en", "en", false}, // add -u extension {"co", "pinyin", "en", "en-u-co-pinyin", false}, @@ -504,6 +510,8 @@ func TestFindKeyAndType(t *testing.T) { {"cu", false, "en-a-va-v-va", "en-a-va"}, {"cu", false, "en-x-a", "en"}, // Tags with the -u extension. + {"nu", true, "en-u-cu-nu", "en-u-cu"}, + {"cu", true, "en-u-cu-nu", "en-u"}, {"co", true, "en-u-co-standard", "standard"}, {"co", true, "yue-u-co-pinyin", "pinyin"}, {"co", true, "en-u-co-abc", "abc"}, @@ -519,9 +527,9 @@ func TestFindKeyAndType(t *testing.T) { {"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"}, } for i, tt := range tests { - start, end, hasExt := Make(tt.in).findTypeForKey(tt.key) - if start != end { - res := tt.in[start:end] + start, sep, end, hasExt := Make(tt.in).findTypeForKey(tt.key) + if sep != end { + res := tt.in[sep:end] if res != tt.out { t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out) } diff --git a/internal/language/parse.go b/internal/language/parse.go index a2fdad89d..c696fd0bd 100644 --- a/internal/language/parse.go +++ b/internal/language/parse.go @@ -138,7 +138,7 @@ func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) { b = make([]byte, n) copy(b, s.b[:oldStart]) } else { - b = s.b[:n:n] + b = s.b[:n] } copy(b[end:], s.b[oldEnd:]) s.b = b @@ -483,7 +483,7 @@ func parseExtensions(scan *scanner) int { func parseExtension(scan *scanner) int { start, end := scan.start, scan.end switch scan.token[0] { - case 'u': + case 'u': // https://www.ietf.org/rfc/rfc6067.txt attrStart := end scan.scan() for last := []byte{}; len(scan.token) > 2; scan.scan() { @@ -503,27 +503,29 @@ func parseExtension(scan *scanner) int { last = scan.token end = scan.end } + // Scan key-type sequences. A key is of length 2 and may be followed + // by 0 or more "type" subtags from 3 to the maximum of 8 letters. var last, key []byte for attrEnd := end; len(scan.token) == 2; last = key { key = scan.token - keyEnd := scan.end - end = scan.acceptMinSize(3) + end = scan.end + for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() { + end = scan.end + } // TODO: check key value validity - if keyEnd == end || bytes.Compare(key, last) != 1 { + if bytes.Compare(key, last) != 1 || scan.err != nil { // We have an invalid key or the keys are not sorted. // Start scanning keys from scratch and reorder. p := attrEnd + 1 scan.next = p keys := [][]byte{} for scan.scan(); len(scan.token) == 2; { - keyStart, keyEnd := scan.start, scan.end - end = scan.acceptMinSize(3) - if keyEnd != end { - keys = append(keys, scan.b[keyStart:end]) - } else { - scan.setError(ErrSyntax) - end = keyStart + keyStart := scan.start + end = scan.end + for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() { + end = scan.end } + keys = append(keys, scan.b[keyStart:end]) } sort.Stable(bytesSort{keys, 2}) if n := len(keys); n > 0 { @@ -547,7 +549,7 @@ func parseExtension(scan *scanner) int { break } } - case 't': + case 't': // https://www.ietf.org/rfc/rfc6497.txt scan.scan() if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { _, end = parseTag(scan) diff --git a/internal/language/parse_test.go b/internal/language/parse_test.go index 0cc97d7a5..e1d428aa6 100644 --- a/internal/language/parse_test.go +++ b/internal/language/parse_test.go @@ -164,13 +164,13 @@ func parseTests() []parseTest { {in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true}, {in: "en-u-c", lang: "en", ext: "", invalid: true}, {in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"}, - {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true}, - {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true}, - {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true}, + {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", changed: true}, + {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", changed: true}, + {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, + {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, + {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, + {in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true}, + {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true}, {in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"}, {in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"}, {in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"}, @@ -179,9 +179,8 @@ func parseTests() []parseTest { {in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true}, {in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true}, {in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true}, - // Invalid "u" extension. Drop invalid parts. - {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true}, - {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true}, + {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, changed: true}, + {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, changed: true}, // LDML spec is not specific about it, but remove duplicates and return an error if the values differ. {in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true}, // No change as the result is a substring of the original! @@ -351,8 +350,8 @@ func TestErrors(t *testing.T) { {"aa-AB", mkInvalid("AB")}, // ill-formed wins over invalid. {"ac-u", ErrSyntax}, - {"ac-u-ca", ErrSyntax}, - {"ac-u-ca-co-pinyin", ErrSyntax}, + {"ac-u-ca", mkInvalid("ac")}, + {"ac-u-ca-co-pinyin", mkInvalid("ac")}, {"noob", ErrSyntax}, } for _, tt := range tests { diff --git a/language/language.go b/language/language.go index abfa17f66..289b3a36d 100644 --- a/language/language.go +++ b/language/language.go @@ -412,6 +412,10 @@ func (t Tag) Extensions() []Extension { // are of the allowed values defined for the Unicode locale extension ('u') in // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // TypeForKey will traverse the inheritance chain to get the correct value. +// +// If there are multiple types associated with a key, only the first will be +// returned. If there is no type associated with a key, it returns the empty +// string. func (t Tag) TypeForKey(key string) string { if !compact.Tag(t).MayHaveExtensions() { if key != "rg" && key != "va" { diff --git a/language/language_test.go b/language/language_test.go index f7711ba34..b2e3ce3c5 100644 --- a/language/language_test.go +++ b/language/language_test.go @@ -523,6 +523,13 @@ func TestCanonicalize(t *testing.T) { {"en-GB-u-rg-usz", "en-GB-u-rg-usz", Raw}, {"en-GB-u-rg-usz-va-posix", "en-GB-u-rg-usz-va-posix", Raw}, {"en-GB-u-rg-usz-co-phonebk", "en-GB-u-co-phonebk-rg-usz", Raw}, + + // CVE-2020-28851 + // invalid key-value pair of -u- extension. + {"ES-u-000-00", "es-u-000-00", Raw}, + {"ES-u-000-00-v-00", "es-u-000-00-v-00", Raw}, + // reordered and unknown extension. + {"ES-v-00-u-000-00", "es-u-000-00-v-00", Raw}, } for i, tt := range tests { in, _ := Raw.Parse(tt.in) @@ -553,6 +560,12 @@ func TestTypeForKey(t *testing.T) { {"rg", "en-u-rg-gbzzzz", "gbzzzz"}, {"nu", "en-u-co-phonebk-nu-arabic", "arabic"}, {"kc", "cmn-u-co-stroke", ""}, + {"rg", "cmn-u-rg", ""}, + {"rg", "cmn-u-rg-co-stroke", ""}, + {"co", "cmn-u-rg-co-stroke", "stroke"}, + {"co", "cmn-u-co-rg-gbzzzz", ""}, + {"rg", "cmn-u-co-rg-gbzzzz", "gbzzzz"}, + {"rg", "cmn-u-rg-gbzzzz-nlzzzz", "gbzzzz"}, } for _, tt := range tests { if v := Make(tt.in).TypeForKey(tt.key); v != tt.out { diff --git a/language/parse_test.go b/language/parse_test.go index 041660c4e..4b7e64db3 100644 --- a/language/parse_test.go +++ b/language/parse_test.go @@ -101,13 +101,13 @@ func parseTests() []parseTest { {in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true}, {in: "en-u-c", lang: "en", ext: "", invalid: true}, {in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"}, - {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true}, - {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, - {in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true}, - {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true}, + {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", invalid: true}, + {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, + {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, + {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, + {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, + {in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true}, + {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true}, {in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"}, {in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"}, {in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"}, @@ -117,8 +117,8 @@ func parseTests() []parseTest { {in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true}, {in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true}, // Invalid "u" extension. Drop invalid parts. - {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true}, - {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true}, + {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, invalid: true, changed: true}, + {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, invalid: true}, // We allow duplicate keys as the LDML spec does not explicitly prohibit it. // TODO: Consider eliminating duplicates and returning an error. {in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true}, @@ -219,8 +219,8 @@ func TestErrors(t *testing.T) { {"aa-AB", mkInvalid("AB")}, // ill-formed wins over invalid. {"ac-u", errSyntax}, - {"ac-u-ca", errSyntax}, - {"ac-u-ca-co-pinyin", errSyntax}, + {"ac-u-ca", mkInvalid("ac")}, + {"ac-u-ca-co-pinyin", mkInvalid("ac")}, {"noob", errSyntax}, } for _, tt := range tests { From e328d63cff14134669501e0e154e4f141c784322 Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Mon, 29 Mar 2021 16:31:20 +0200 Subject: [PATCH 09/40] language: fix off-by-one error Regions are encoded starting from 1. However, one of the region-related tables assumed 0-based indices. This caused a crash when used with ZZ, the largest region. Fixes golang/go#43834 Change-Id: Iaed6b9d2683cd50504e6d33c8a6df8b21dd1687d Reviewed-on: https://go-review.googlesource.com/c/text/+/305469 Trust: Marcel van Lohuizen Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Reviewed-by: Seth Vargo Reviewed-by: Ian Lance Taylor --- language/gen.go | 4 +++- language/match_test.go | 14 ++++++++++++++ language/tables.go | 8 ++++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/language/gen.go b/language/gen.go index 445882e56..60bdf64d0 100644 --- a/language/gen.go +++ b/language/gen.go @@ -150,7 +150,9 @@ func (b *builder) writeMatchData() { regions := strings.Split(g.Contains, " ") regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...) } - regionToGroups := make([]uint8, language.NumRegions) + // Regions start at 1, so the slice must be one larger than the number of + // regions. + regionToGroups := make([]uint8, language.NumRegions+1) idToIndex := map[string]uint8{} for i, mv := range lm[0].MatchVariable { diff --git a/language/match_test.go b/language/match_test.go index c21b86372..a6df3e1dc 100644 --- a/language/match_test.go +++ b/language/match_test.go @@ -224,6 +224,20 @@ func (t haveTag) String() string { return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript) } +func TestIssue43834(t *testing.T) { + matcher := NewMatcher([]Tag{English}) + + // ZZ is the largest region code and should not cause overflow. + desired, _, err := ParseAcceptLanguage("en-ZZ") + if err != nil { + t.Error(err) + } + _, i, _ := matcher.Match(desired...) + if i != 0 { + t.Errorf("got %v; want 0", i) + } +} + func TestBestMatchAlloc(t *testing.T) { m := NewMatcher(makeTagList("en sr nl")) // Go allocates when creating a list of tags from a single tag! diff --git a/language/tables.go b/language/tables.go index 87e58a02a..96b57f610 100644 --- a/language/tables.go +++ b/language/tables.go @@ -47,7 +47,7 @@ const ( _Zzzz = 251 ) -var regionToGroups = []uint8{ // 357 elements +var regionToGroups = []uint8{ // 358 elements // Entry 0 - 3F 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00, @@ -98,8 +98,8 @@ var regionToGroups = []uint8{ // 357 elements 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, -} // Size: 381 bytes + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +} // Size: 382 bytes var paradigmLocales = [][3]uint16{ // 3 elements 0: [3]uint16{0x139, 0x0, 0x7b}, @@ -295,4 +295,4 @@ var matchRegion = []regionIntelligibility{ // 15 elements 14: {lang: 0x529, script: 0x3c, group: 0x80, distance: 0x5}, } // Size: 114 bytes -// Total table size 1471 bytes (1KiB); checksum: 4CB1CD46 +// Total table size 1472 bytes (1KiB); checksum: F86C669 From c2d28a6ddf6cb833e996ccb00cbb4206394958d2 Mon Sep 17 00:00:00 2001 From: Carl Menezes Date: Sun, 11 Apr 2021 00:32:09 +0000 Subject: [PATCH 10/40] number: match input example to be Dutch as in the output Change-Id: I5d1eaf79aa267121ce08dc3990b86ff30c8e0505 GitHub-Last-Rev: 0421867699811a7e6569e1d1d38943118d5c282d GitHub-Pull-Request: golang/text#21 Reviewed-on: https://go-review.googlesource.com/c/text/+/297589 Reviewed-by: Emmanuel Odeke Reviewed-by: Tobias Klauser Run-TryBot: Emmanuel Odeke TryBot-Result: Go Bot --- number/doc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number/doc.go b/number/doc.go index 2ad8d431a..925383acc 100644 --- a/number/doc.go +++ b/number/doc.go @@ -19,7 +19,7 @@ // // p := message.NewPrinter(language.Dutch) // -// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) +// p.Printf("Er zijn %v fietsen per huishouden.", number.Decimal(1.2)) // // Prints: Er zijn 1,2 fietsen per huishouden. // // From 5c7c50ebbd4f5b0d53b9b2fcdbeb92ffb732a06e Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Fri, 30 Apr 2021 09:29:37 -0400 Subject: [PATCH 11/40] go.mod: upgrade to go 1.17 This change was produced using 'go mod tidy -go=1.17' with a go command built at CL 315210. This activates lazy loading, and updates the go.mod file to maintain the lazy-loading invariants (namely, including an explicit requirement for every package transitively imported by the main module). Note that this does *not* prevent users with earlier go versions from successfully building packages from this module. For golang/go#36460 Change-Id: Iabb65fc3ed9727abecc3926abcecd445c967d0a9 Reviewed-on: https://go-review.googlesource.com/c/text/+/315571 Trust: Bryan C. Mills Run-TryBot: Bryan C. Mills TryBot-Result: Go Bot Reviewed-by: Alexander Rakoczy Reviewed-by: Marcel van Lohuizen --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 8ff7ecb6f..63bc05f20 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module golang.org/x/text require golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e -go 1.11 +go 1.17 From 3115f89c4b99a620c7f1a4395a2b4405e95b82b6 Mon Sep 17 00:00:00 2001 From: Deepak S Date: Sat, 22 May 2021 08:20:00 +0000 Subject: [PATCH 12/40] language: use multiple runs in TestBestMatchAlloc AllocsPerRun sets runtime.GOMAXPROCS to 1, but it doesn't prevent the runtime from descheduling a goroutine and performing an allocation somewhere in the background. This commit changes the test to use a number of runs large enough to average away the occasional noisy allocation. Fixes golang/go#45809 Change-Id: Ibf904016d0c067740469c8e861079611440222a1 GitHub-Last-Rev: 1606b6969a4d462804dda8d98b9c4432241e87ea GitHub-Pull-Request: golang/text#23 Reviewed-on: https://go-review.googlesource.com/c/text/+/321737 Reviewed-by: Bryan C. Mills Trust: Michael Knyszek --- language/match_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language/match_test.go b/language/match_test.go index a6df3e1dc..313580b60 100644 --- a/language/match_test.go +++ b/language/match_test.go @@ -242,7 +242,7 @@ func TestBestMatchAlloc(t *testing.T) { m := NewMatcher(makeTagList("en sr nl")) // Go allocates when creating a list of tags from a single tag! list := []Tag{English} - avg := testtext.AllocsPerRun(1, func() { + avg := testtext.AllocsPerRun(100, func() { m.Match(list...) }) if avg > 0 { From 383b2e75a7a4198c42f8f87833eefb772868a56f Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 9 Aug 2021 15:09:12 -0400 Subject: [PATCH 13/40] language: turn parsing panics into ErrSyntax We keep finding new panics in the language parser. Limit the damage by reporting those inputs as syntax errors. Change-Id: I786fe127c3df7e4c8e042d15095d3acf3c4e4a50 Reviewed-on: https://go-review.googlesource.com/c/text/+/340830 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Roland Shoemaker --- internal/language/language.go | 43 +++++++++++++++++++++++++++++++---- internal/language/parse.go | 7 ++++++ language/parse.go | 22 ++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/internal/language/language.go b/internal/language/language.go index f41aedcfc..6105bc7fa 100644 --- a/internal/language/language.go +++ b/internal/language/language.go @@ -251,6 +251,13 @@ func (t Tag) Parent() Tag { // ParseExtension parses s as an extension and returns it on success. func ParseExtension(s string) (ext string, err error) { + defer func() { + if recover() != nil { + ext = "" + err = ErrSyntax + } + }() + scan := makeScannerString(s) var end int if n := len(scan.token); n != 1 { @@ -461,7 +468,14 @@ func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) { // ParseBase parses a 2- or 3-letter ISO 639 code. // It returns a ValueError if s is a well-formed but unknown language identifier // or another error if another error occurred. -func ParseBase(s string) (Language, error) { +func ParseBase(s string) (l Language, err error) { + defer func() { + if recover() != nil { + l = 0 + err = ErrSyntax + } + }() + if n := len(s); n < 2 || 3 < n { return 0, ErrSyntax } @@ -472,7 +486,14 @@ func ParseBase(s string) (Language, error) { // ParseScript parses a 4-letter ISO 15924 code. // It returns a ValueError if s is a well-formed but unknown script identifier // or another error if another error occurred. -func ParseScript(s string) (Script, error) { +func ParseScript(s string) (scr Script, err error) { + defer func() { + if recover() != nil { + scr = 0 + err = ErrSyntax + } + }() + if len(s) != 4 { return 0, ErrSyntax } @@ -489,7 +510,14 @@ func EncodeM49(r int) (Region, error) { // ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code. // It returns a ValueError if s is a well-formed but unknown region identifier // or another error if another error occurred. -func ParseRegion(s string) (Region, error) { +func ParseRegion(s string) (r Region, err error) { + defer func() { + if recover() != nil { + r = 0 + err = ErrSyntax + } + }() + if n := len(s); n < 2 || 3 < n { return 0, ErrSyntax } @@ -578,7 +606,14 @@ type Variant struct { // ParseVariant parses and returns a Variant. An error is returned if s is not // a valid variant. -func ParseVariant(s string) (Variant, error) { +func ParseVariant(s string) (v Variant, err error) { + defer func() { + if recover() != nil { + v = Variant{} + err = ErrSyntax + } + }() + s = strings.ToLower(s) if id, ok := variantIndex[s]; ok { return Variant{id, s}, nil diff --git a/internal/language/parse.go b/internal/language/parse.go index c696fd0bd..47ee0fed1 100644 --- a/internal/language/parse.go +++ b/internal/language/parse.go @@ -232,6 +232,13 @@ func Parse(s string) (t Tag, err error) { if s == "" { return Und, ErrSyntax } + defer func() { + if recover() != nil { + t = Und + err = ErrSyntax + return + } + }() if len(s) <= maxAltTaglen { b := [maxAltTaglen]byte{} for i, c := range s { diff --git a/language/parse.go b/language/parse.go index 11acfd885..59b041008 100644 --- a/language/parse.go +++ b/language/parse.go @@ -43,6 +43,13 @@ func Parse(s string) (t Tag, err error) { // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // The resulting tag is canonicalized using the canonicalization type c. func (c CanonType) Parse(s string) (t Tag, err error) { + defer func() { + if recover() != nil { + t = Tag{} + err = language.ErrSyntax + } + }() + tt, err := language.Parse(s) if err != nil { return makeTag(tt), err @@ -79,6 +86,13 @@ func Compose(part ...interface{}) (t Tag, err error) { // tag is returned after canonicalizing using CanonType c. If one or more errors // are encountered, one of the errors is returned. func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { + defer func() { + if recover() != nil { + t = Tag{} + err = language.ErrSyntax + } + }() + var b language.Builder if err = update(&b, part...); err != nil { return und, err @@ -142,6 +156,14 @@ var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") // Tags with a weight of zero will be dropped. An error will be returned if the // input could not be parsed. func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { + defer func() { + if recover() != nil { + tag = nil + q = nil + err = language.ErrSyntax + } + }() + var entry string for s != "" { if entry, s = split(s, ','); entry == "" { From 2ca5a52dcb2f184d71fbbf2a28372c5882d990a8 Mon Sep 17 00:00:00 2001 From: Rich Hong Date: Mon, 28 Sep 2020 16:32:38 -0400 Subject: [PATCH 14/40] internal/language: fix canonicalization of extlang parseTag tries to replace - with , but itself can also be replaced with its canonical form which can be a different length than the original . The existing implementation assumes that the length of is 3 and would leave scanner positions in an incorrect state if the length of is not 3. Fixes golang/go#41617 Change-Id: Ie0da320530e2545f9b521e7b8cf503d854c50b45 Reviewed-on: https://go-review.googlesource.com/c/text/+/260177 Run-TryBot: Alberto Donizetti TryBot-Result: Go Bot Trust: Alberto Donizetti Trust: Cherry Mui Trust: Marcel van Lohuizen Reviewed-by: Marcel van Lohuizen --- internal/language/compact/parse_test.go | 5 +++++ internal/language/language_test.go | 2 ++ internal/language/parse.go | 24 ++++++++++++++---------- internal/language/parse_test.go | 10 +++++++++- language/language_test.go | 2 ++ language/parse_test.go | 5 +++++ 6 files changed, 37 insertions(+), 11 deletions(-) diff --git a/internal/language/compact/parse_test.go b/internal/language/compact/parse_test.go index abe3a58c0..2db200b88 100644 --- a/internal/language/compact/parse_test.go +++ b/internal/language/compact/parse_test.go @@ -122,6 +122,11 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: true}, + {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, diff --git a/internal/language/language_test.go b/internal/language/language_test.go index 8244c1c8a..668034d03 100644 --- a/internal/language/language_test.go +++ b/internal/language/language_test.go @@ -681,6 +681,8 @@ var ( "en-t-t0-abcd", "en-t-nl-latn", "en-t-t0-abcd-x-a", + "en_t_pt_MLt", + "en-t-fr-est", } // Change, but not memory allocation required. benchSimpleChange = []string{ diff --git a/internal/language/parse.go b/internal/language/parse.go index 47ee0fed1..aad1e0acf 100644 --- a/internal/language/parse.go +++ b/internal/language/parse.go @@ -270,7 +270,7 @@ func parse(scan *scanner, s string) (t Tag, err error) { } else if n >= 4 { return Und, ErrSyntax } else { // the usual case - t, end = parseTag(scan) + t, end = parseTag(scan, true) if n := len(scan.token); n == 1 { t.pExt = uint16(end) end = parseExtensions(scan) @@ -296,7 +296,8 @@ func parse(scan *scanner, s string) (t Tag, err error) { // parseTag parses language, script, region and variants. // It returns a Tag and the end position in the input that was parsed. -func parseTag(scan *scanner) (t Tag, end int) { +// If doNorm is true, then - will be normalized to . +func parseTag(scan *scanner, doNorm bool) (t Tag, end int) { var e error // TODO: set an error if an unknown lang, script or region is encountered. t.LangID, e = getLangID(scan.token) @@ -307,14 +308,17 @@ func parseTag(scan *scanner) (t Tag, end int) { for len(scan.token) == 3 && isAlpha(scan.token[0]) { // From http://tools.ietf.org/html/bcp47, - tags are equivalent // to a tag of the form . - lang, e := getLangID(scan.token) - if lang != 0 { - t.LangID = lang - copy(scan.b[langStart:], lang.String()) - scan.b[langStart+3] = '-' - scan.start = langStart + 4 + if doNorm { + lang, e := getLangID(scan.token) + if lang != 0 { + t.LangID = lang + langStr := lang.String() + copy(scan.b[langStart:], langStr) + scan.b[langStart+len(langStr)] = '-' + scan.start = langStart + len(langStr) + 1 + } + scan.gobble(e) } - scan.gobble(e) end = scan.scan() } if len(scan.token) == 4 && isAlpha(scan.token[0]) { @@ -559,7 +563,7 @@ func parseExtension(scan *scanner) int { case 't': // https://www.ietf.org/rfc/rfc6497.txt scan.scan() if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { - _, end = parseTag(scan) + _, end = parseTag(scan, false) scan.toLower(start, end) } for len(scan.token) == 2 && !isAlpha(scan.token[1]) { diff --git a/internal/language/parse_test.go b/internal/language/parse_test.go index e1d428aa6..0af9e8a25 100644 --- a/internal/language/parse_test.go +++ b/internal/language/parse_test.go @@ -192,6 +192,14 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: false}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: false}, + // The same input here is used in both TestParse and TestParseExtensions. + // changed should be true for this input in TestParse but changed should be false for this input in TestParseExtensions + // because the entire input has been reformatted but the extension part hasn't. + // {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, @@ -299,7 +307,7 @@ func TestParseTag(t *testing.T) { return Tag{}, true } scan := makeScannerString(tt.in) - id, end := parseTag(&scan) + id, end := parseTag(&scan, true) id.str = string(scan.b[:end]) tt.ext = "" tt.extList = []string{} diff --git a/language/language_test.go b/language/language_test.go index b2e3ce3c5..d45706c98 100644 --- a/language/language_test.go +++ b/language/language_test.go @@ -723,6 +723,8 @@ var ( "en-t-t0-abcd", "en-t-nl-latn", "en-t-t0-abcd-x-a", + "en_t_pt_MLt", + "en-t-fr-est", } // Change, but not memory allocation required. benchSimpleChange = []string{ diff --git a/language/parse_test.go b/language/parse_test.go index 4b7e64db3..e1e5653ad 100644 --- a/language/parse_test.go +++ b/language/parse_test.go @@ -129,6 +129,11 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: true}, + {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, From 29bf0102df0c3c8844ae296826d1a9e7302f26df Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Mon, 3 May 2021 20:14:09 +0000 Subject: [PATCH 15/40] Revert "number: match input example to be Dutch as in the output" This reverts commit c2d28a6ddf6cb833e996ccb00cbb4206394958d2. Reason for revert: This change was incorrect. The examples were supposed to demonstrate that the API can provide translations. Change-Id: I247d5509136d34ce4c82a8ac2de50dad3f652a78 Reviewed-on: https://go-review.googlesource.com/c/text/+/316529 Reviewed-by: Emmanuel Odeke Reviewed-by: Alberto Donizetti Run-TryBot: Emmanuel Odeke Run-TryBot: Alberto Donizetti Trust: Cherry Mui --- number/doc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number/doc.go b/number/doc.go index 925383acc..2ad8d431a 100644 --- a/number/doc.go +++ b/number/doc.go @@ -19,7 +19,7 @@ // // p := message.NewPrinter(language.Dutch) // -// p.Printf("Er zijn %v fietsen per huishouden.", number.Decimal(1.2)) +// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) // // Prints: Er zijn 1,2 fietsen per huishouden. // // From 22bfdb67013842b9fed978a6f362b3edee086066 Mon Sep 17 00:00:00 2001 From: Amelia Downs Date: Mon, 13 Sep 2021 16:17:04 -0400 Subject: [PATCH 16/40] language: fix ExampleMatcher output This removes the "TODO" after the output section, which was preventing this test from running properly and from running in pkg.go.dev. Fixing this revealed that many of the outputs were out of date and needed to be updated as well. Fixes: golang/go#48361 Change-Id: I495e4eb83f071d4d9810bf44bfabe3258a6220c8 Reviewed-on: https://go-review.googlesource.com/c/text/+/349552 Trust: Alberto Donizetti Trust: Marcel van Lohuizen Run-TryBot: Alberto Donizetti Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Reviewed-by: Alberto Donizetti Reviewed-by: Amelia Downs Reviewed-by: Marcel van Lohuizen --- language/examples_test.go | 58 +++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/language/examples_test.go b/language/examples_test.go index ad089a3f4..61401f252 100644 --- a/language/examples_test.go +++ b/language/examples_test.go @@ -247,24 +247,28 @@ func ExampleMatcher() { // This is the set of tags from which we want to pick the best match. These // can be, for example, the supported languages for some package. tags := []language.Tag{ - language.English, - language.BritishEnglish, - language.French, - language.Afrikaans, - language.BrazilianPortuguese, - language.EuropeanPortuguese, - language.Croatian, - language.SimplifiedChinese, - language.Raw.Make("iw-IL"), - language.Raw.Make("iw"), - language.Raw.Make("he"), + language.English, // en + language.BritishEnglish, // en-GB + language.French, // fr + language.Afrikaans, // af + language.BrazilianPortuguese, // pt-BR + language.EuropeanPortuguese, // pt-PT + language.SimplifiedChinese, // zh-Hans + language.Raw.Make("iw-IL"), // Hebrew from Israel + language.Raw.Make("iw"), // Hebrew + language.Raw.Make("he"), // Hebrew } m := language.NewMatcher(tags) // A simple match. fmt.Println(m.Match(language.Make("fr"))) - // Australian English is closer to British than American English. + // Australian English is closer to British English than American English. + // The resulting match is "en-GB-u-rg-auzzzz". The first language listed, + // "en-GB", is the matched language. Next is the region override prefix + // "-u-rg-", the region override "au", and the region override suffix "zzzz". + // The region override is for things like currency, dates, and measurement + // systems. fmt.Println(m.Match(language.Make("en-AU"))) // Default to the first tag passed to the Matcher if there is no match. @@ -275,15 +279,12 @@ func ExampleMatcher() { fmt.Println("----") - // Someone specifying sr-Latn is probably fine with getting Croatian. - fmt.Println(m.Match(language.Make("sr-Latn"))) - // We match SimplifiedChinese, but with Low confidence. fmt.Println(m.Match(language.TraditionalChinese)) - // Serbian in Latin script is a closer match to Croatian than Traditional - // Chinese to Simplified Chinese. - fmt.Println(m.Match(language.TraditionalChinese, language.Make("sr-Latn"))) + // British English is closer to Australian English than Traditional Chinese + // to Simplified Chinese. + fmt.Println(m.Match(language.TraditionalChinese, language.Make("en-AU"))) fmt.Println("----") @@ -297,7 +298,7 @@ func ExampleMatcher() { fmt.Println("----") - // If a Matcher is initialized with a language and it's deprecated version, + // If a Matcher is initialized with a language and its deprecated version, // it will distinguish between them. fmt.Println(m.Match(language.Raw.Make("iw"))) @@ -319,26 +320,23 @@ func ExampleMatcher() { // Output: // fr 2 Exact - // en-GB 1 High + // en-GB-u-rg-auzzzz 1 High // en 0 No // en 0 No // ---- - // hr 6 High - // zh-Hans 7 Low - // hr 6 High + // zh-Hans 6 Low + // en-GB-u-rg-auzzzz 1 High // ---- - // pt-BR 4 High - // fr 2 High - // af 3 High + // pt-BR 4 Exact + // fr-u-rg-bezzzz 2 High + // af-u-rg-nazzzz 3 High // ---- - // iw 9 Exact - // he 10 Exact + // iw-IL 7 Exact + // he-u-rg-ilzzzz 9 Exact // ---- // fr-u-cu-frf 2 Exact // fr-u-cu-frf 2 High // en-u-co-phonebk 0 No - - // TODO: "he" should be "he-u-rg-IL High" } func ExampleMatchStrings() { From 1b993004bb667a0b60e9bff6aa6dd173d4103d42 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Thu, 30 Sep 2021 11:59:03 -0400 Subject: [PATCH 17/40] all: upgrade x/tools to latest Apparently the x/text/message/pipeline test uses x/tools/go/ssa to introspect something, and as of CL 352953 the 'runtime' package needs a newer version of x/tools/go/ssa (probably to pick up CL 333110). Fixes golang/go#48701 Updates golang/go#47091 Change-Id: I78474f5ad037744906f5c771a7af7e0195f65173 Reviewed-on: https://go-review.googlesource.com/c/text/+/353350 Trust: Bryan C. Mills Trust: Josh Bleecher Snyder Run-TryBot: Bryan C. Mills TryBot-Result: Go Bot Reviewed-by: Josh Bleecher Snyder --- go.mod | 8 +++++++- go.sum | 27 +++++++++++++++++++++++++- message/pipeline/pipeline_test.go | 32 ++++++++++++++++++++++++++++--- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 63bc05f20..32d3aa025 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,11 @@ module golang.org/x/text -require golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e +require golang.org/x/tools v0.1.7 + +require ( + golang.org/x/mod v0.4.2 // indirect + golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect +) go 1.17 diff --git a/go.sum b/go.sum index 6a308d730..13ba81505 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,27 @@ -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e h1:FDhOuMEY4JVRztM/gsbk+IKUQ8kj74bxZrgw87eMMVc= +github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e h1:WUoyKPm6nCo1BnNUvPGnFG3T5DUVem42yDJZZ4CNxMA= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.7 h1:6j8CgantCy3yc8JGBqkDLMKWqZ0RDU2g1HVgacojGWQ= +golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/message/pipeline/pipeline_test.go b/message/pipeline/pipeline_test.go index fe3b5daff..51c14a50b 100644 --- a/message/pipeline/pipeline_test.go +++ b/message/pipeline/pipeline_test.go @@ -139,18 +139,44 @@ func initTestdataModule(t *testing.T, dst string) { } goMod := fmt.Sprintf(`module testdata -go 1.11 -require golang.org/x/text v0.0.0-00010101000000-000000000000 -replace golang.org/x/text v0.0.0-00010101000000-000000000000 => %s + +replace golang.org/x/text => %s `, xTextDir) if err := ioutil.WriteFile(filepath.Join(dst, "go.mod"), []byte(goMod), 0644); err != nil { t.Fatal(err) } + // Copy in the checksums from the parent module so that we won't + // need to re-fetch them from the checksum database. data, err := ioutil.ReadFile(filepath.Join(xTextDir, "go.sum")) + if err != nil { + t.Fatal(err) + } if err := ioutil.WriteFile(filepath.Join(dst, "go.sum"), data, 0644); err != nil { t.Fatal(err) } + + // We've added a replacement for the parent version of x/text, + // but now we need to populate the correct version. + // (We can't just replace the zero-version because x/text + // may indirectly depend on some nonzero version of itself.) + // + // We use 'go get' instead of 'go mod tidy' to avoid the old-release + // compatibility check when graph pruning is enabled, and to avoid doing + // more work than necessary for test dependencies of imported packages + // (we're not going to run those tests here anyway). + // + // We 'go get' the packages in the testdata module — not specific dependencies + // of those packages — so that they will resolve to whatever version is + // already required in the (replaced) x/text go.mod file. + + getCmd := exec.Command("go", "get", "-d", "./...") + getCmd.Dir = dst + getCmd.Env = append(os.Environ(), "PWD="+dst, "GOPROXY=off", "GOCACHE=off") + if out, err := getCmd.CombinedOutput(); err != nil { + t.Logf("%s", out) + t.Fatal(err) + } } func checkOutput(t *testing.T, gen string, testdataDir string) { From 5bd84dd9b33bd2bdebd8a6a6477920a8e492d47f Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Sun, 3 Oct 2021 03:13:22 +0000 Subject: [PATCH 18/40] =?UTF-8?q?encoding/simplifiedchinese:=20Fixes=20?= =?UTF-8?q?=E2=82=AC=20encoding=20in=20GB18030?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The euro sign is an exception which is given a single byte code of 0x80 in Microsoft's later versions of CP936/GBK and a two byte code of A2 E3 in GB18030. https://en.wikipedia.org/wiki/GB_18030#cite_note-4 Fixes golang/go#48691 Change-Id: I6a4460274d4313ad1d03bcd8070373af674691eb GitHub-Last-Rev: acbbc50f20d663452f8da77cf2a66d8d893bec1d GitHub-Pull-Request: golang/text#26 Reviewed-on: https://go-review.googlesource.com/c/text/+/353712 Reviewed-by: Nigel Tao Trust: Nigel Tao Trust: Alberto Donizetti Run-TryBot: Nigel Tao TryBot-Result: Go Bot --- encoding/simplifiedchinese/all_test.go | 10 ++++++++++ encoding/simplifiedchinese/gbk.go | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/encoding/simplifiedchinese/all_test.go b/encoding/simplifiedchinese/all_test.go index a556c94dd..fbb623cd2 100644 --- a/encoding/simplifiedchinese/all_test.go +++ b/encoding/simplifiedchinese/all_test.go @@ -40,7 +40,9 @@ func TestNonRepertoire(t *testing.T) { {enc, HZGB2312, "a갂", "a"}, {enc, HZGB2312, "\u6cf5갂", "~{1C~}"}, + {dec, GBK, "\xa2\xe3", "€"}, {dec, GB18030, "\x80", "€"}, + {dec, GB18030, "\x81", "\ufffd"}, {dec, GB18030, "\x81\x20", "\ufffd "}, {dec, GB18030, "\xfe\xfe", "\ufffd"}, @@ -125,6 +127,14 @@ func TestBasics(t *testing.T) { encPrefix: "~{", encoded: ";(F#,6@WCN^O`GW!#", utf8: "花间一壶酒,独酌无相亲。", + }, { + e: GBK, + encoded: "\x80", + utf8: "€", + }, { + e: GB18030, + encoded: "\xa2\xe3", + utf8: "€", }} for _, tc := range testCases { diff --git a/encoding/simplifiedchinese/gbk.go b/encoding/simplifiedchinese/gbk.go index b89c45b03..0e0fabfd6 100644 --- a/encoding/simplifiedchinese/gbk.go +++ b/encoding/simplifiedchinese/gbk.go @@ -55,6 +55,8 @@ loop: // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. + // GBK’s decoder is gb18030’s decoder. https://encoding.spec.whatwg.org/#gbk-decoder + // If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decoder case c0 == 0x80: r, size = '€', 1 @@ -180,7 +182,9 @@ func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. - if r == '€' { + // GBK’s encoder is gb18030’s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder + // If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoder + if !e.gb18030 && r == '€' { r = 0x80 goto write1 } From bb1c79828956f0f6753be0920efcecf32ba55f93 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Thu, 6 May 2021 12:39:25 -0700 Subject: [PATCH 19/40] internal/export/idna: make Transitional an actual toggle Previously, it always enabled transitional processing instead of toggling, despite the fact that it took a boolean argument. For golang/go#30940. Change-Id: I00ad51ec55abfb2de28deb8c98f949989ece1099 Reviewed-on: https://go-review.googlesource.com/c/text/+/317729 Reviewed-by: Ian Lance Taylor Reviewed-by: Damien Neil Trust: Damien Neil Run-TryBot: Damien Neil TryBot-Result: Go Bot --- internal/export/idna/conformance_test.go | 3 +++ internal/export/idna/idna10.0.0.go | 4 ++-- internal/export/idna/idna10.0.0_test.go | 12 +++++++++--- internal/export/idna/idna9.0.0.go | 4 ++-- internal/export/idna/idna9.0.0_test.go | 12 +++++++++--- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/internal/export/idna/conformance_test.go b/internal/export/idna/conformance_test.go index 1cdf43ca9..3e0e87518 100644 --- a/internal/export/idna/conformance_test.go +++ b/internal/export/idna/conformance_test.go @@ -2,6 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 +// +build go1.10 + package idna import ( diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index 3e7bac3cb..e6b62a287 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -59,10 +59,10 @@ type Option func(*options) // Transitional sets a Profile to use the Transitional mapping as defined in UTS // #46. This will cause, for example, "ß" to be mapped to "ss". Using the // transitional mapping provides a compromise between IDNA2003 and IDNA2008 -// compatibility. It is used by most browsers when resolving domain names. This +// compatibility. It is used by some browsers when resolving domain names. This // option is only meaningful if combined with MapForLookup. func Transitional(transitional bool) Option { - return func(o *options) { o.transitional = true } + return func(o *options) { o.transitional = transitional } } // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index 4142bfa84..0b9f7a862 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -34,6 +34,10 @@ func TestLabelErrors(t *testing.T) { std3 := kind{"STD3", p.ToASCII} p = New(MapForLookup(), CheckHyphens(false)) hyphens := kind{"CheckHyphens", p.ToASCII} + p = New(MapForLookup(), Transitional(true)) + transitional := kind{"Transitional", p.ToASCII} + p = New(MapForLookup(), Transitional(false)) + nontransitional := kind{"Nontransitional", p.ToASCII} testCases := []struct { kind @@ -95,14 +99,16 @@ func TestLabelErrors(t *testing.T) { {hyphens, "-label-.com", "-label-.com", ""}, // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of - // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return - // lab9.be. + // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, + // Transitional vs Nontransitional processing + {transitional, "Plan9faß.de", "plan9fass.de", ""}, + {nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""}, + // Chrome 54.0 recognizes the error and treats this input verbatim as a // search string. // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the diff --git a/internal/export/idna/idna9.0.0.go b/internal/export/idna/idna9.0.0.go index 7acecb800..4979fdc17 100644 --- a/internal/export/idna/idna9.0.0.go +++ b/internal/export/idna/idna9.0.0.go @@ -58,10 +58,10 @@ type Option func(*options) // Transitional sets a Profile to use the Transitional mapping as defined in UTS // #46. This will cause, for example, "ß" to be mapped to "ss". Using the // transitional mapping provides a compromise between IDNA2003 and IDNA2008 -// compatibility. It is used by most browsers when resolving domain names. This +// compatibility. It is used by some browsers when resolving domain names. This // option is only meaningful if combined with MapForLookup. func Transitional(transitional bool) Option { - return func(o *options) { o.transitional = true } + return func(o *options) { o.transitional = transitional } } // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts diff --git a/internal/export/idna/idna9.0.0_test.go b/internal/export/idna/idna9.0.0_test.go index b76b79628..524727b1b 100644 --- a/internal/export/idna/idna9.0.0_test.go +++ b/internal/export/idna/idna9.0.0_test.go @@ -34,6 +34,10 @@ func TestLabelErrors(t *testing.T) { std3 := kind{"STD3", p.ToASCII} p = New(MapForLookup(), CheckHyphens(false)) hyphens := kind{"CheckHyphens", p.ToASCII} + p = New(MapForLookup(), Transitional(true)) + transitional := kind{"Transitional", p.ToASCII} + p = New(MapForLookup(), Transitional(false)) + nontransitional := kind{"Nontransitional", p.ToASCII} testCases := []struct { kind @@ -91,14 +95,16 @@ func TestLabelErrors(t *testing.T) { {hyphens, "-label-.com", "-label-.com", ""}, // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of - // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return - // lab9.be. + // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, + // Transitional vs Nontransitional processing + {transitional, "Plan9faß.de", "plan9fass.de", ""}, + {nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""}, + // Chrome 54.0 recognizes the error and treats this input verbatim as a // search string. // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the From 86e65b86426d37ada82226be2e8c1afae6bdc178 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Thu, 6 May 2021 14:25:55 -0700 Subject: [PATCH 20/40] internal/export/idna: fix int32 overflows Prefer multiplication (int64(b)*int64(c) > MaxInt32) over division (b > MaxInt32/c) for overflow checking as it is a little faster on 386, and a LOT faster on amd64. For golang/go#28233. Change-Id: Ibf42529b93b699417781adc7eca6e66474f00bbf Reviewed-on: https://go-review.googlesource.com/c/text/+/317731 Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor Trust: Damien Neil --- internal/export/idna/punycode.go | 36 +++++++++++++++++++-------- internal/export/idna/punycode_test.go | 1 + 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/internal/export/idna/punycode.go b/internal/export/idna/punycode.go index f0cbd487b..7e96febf1 100644 --- a/internal/export/idna/punycode.go +++ b/internal/export/idna/punycode.go @@ -47,6 +47,7 @@ func decode(encoded string) (string, error) { } } i, n, bias := int32(0), initialN, initialBias + overflow := false for pos < len(encoded) { oldI, w := i, int32(1) for k := base; ; k += base { @@ -58,29 +59,32 @@ func decode(encoded string) (string, error) { return "", punyError(encoded) } pos++ - i += digit * w - if i < 0 { + i, overflow = madd(i, digit, w) + if overflow { return "", punyError(encoded) } t := k - bias - if t < tmin { + if k <= bias { t = tmin - } else if t > tmax { + } else if k >= bias+tmax { t = tmax } if digit < t { break } - w *= base - t - if w >= math.MaxInt32/base { + w, overflow = madd(0, w, base-t) + if overflow { return "", punyError(encoded) } } + if len(output) >= 1024 { + return "", punyError(encoded) + } x := int32(len(output) + 1) bias = adapt(i-oldI, x, oldI == 0) n += i / x i %= x - if n > utf8.MaxRune || len(output) >= 1024 { + if n < 0 || n > utf8.MaxRune { return "", punyError(encoded) } output = append(output, 0) @@ -113,6 +117,7 @@ func encode(prefix, s string) (string, error) { if b > 0 { output = append(output, '-') } + overflow := false for remaining != 0 { m := int32(0x7fffffff) for _, r := range s { @@ -120,8 +125,8 @@ func encode(prefix, s string) (string, error) { m = r } } - delta += (m - n) * (h + 1) - if delta < 0 { + delta, overflow = madd(delta, m-n, h+1) + if overflow { return "", punyError(s) } n = m @@ -139,9 +144,9 @@ func encode(prefix, s string) (string, error) { q := delta for k := base; ; k += base { t := k - bias - if t < tmin { + if k <= bias { t = tmin - } else if t > tmax { + } else if k >= bias+tmax { t = tmax } if q < t { @@ -162,6 +167,15 @@ func encode(prefix, s string) (string, error) { return string(output), nil } +// madd computes a + (b * c), detecting overflow. +func madd(a, b, c int32) (next int32, overflow bool) { + p := int64(b) * int64(c) + if p > math.MaxInt32-int64(a) { + return 0, true + } + return a + int32(p), false +} + func decodeDigit(x byte) (digit int32, ok bool) { switch { case '0' <= x && x <= '9': diff --git a/internal/export/idna/punycode_test.go b/internal/export/idna/punycode_test.go index 2d99239ec..5cf0c968a 100644 --- a/internal/export/idna/punycode_test.go +++ b/internal/export/idna/punycode_test.go @@ -177,6 +177,7 @@ var punycodeErrorTestCases = [...]string{ "decode 9999999999a", // "9999999999a" overflows the int32 calculation. "encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow. + "encode " + strings.Repeat("x", 65666) + "\uffff", // int32 overflow. issue #28233 } func TestPunycodeErrors(t *testing.T) { From 593da8d90fd448917a9ef0ca582e8d2bbe50ab2b Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Thu, 28 Oct 2021 17:50:34 -0700 Subject: [PATCH 21/40] internal/export/idna: avoid strconv.Unquote errors on surrogate halves The IDNA test data includes surrogate halves, which strconv.Unquote reports an error for as of Go 1.18. Change-Id: I9eb954aa3ab3a177ab0984d0da7caee7a47920a5 Reviewed-on: https://go-review.googlesource.com/c/text/+/359554 Trust: Damien Neil Run-TryBot: Damien Neil TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor --- internal/export/idna/idna_test.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/internal/export/idna/idna_test.go b/internal/export/idna/idna_test.go index 7235452c2..e568d3b90 100644 --- a/internal/export/idna/idna_test.go +++ b/internal/export/idna/idna_test.go @@ -5,7 +5,9 @@ package idna import ( + "encoding/hex" "fmt" + "regexp" "strconv" "strings" "testing" @@ -99,12 +101,14 @@ func doTest(t *testing.T, f func(string) (string, error), name, input, want, err }) } +var unescapeRE = regexp.MustCompile(`\\u([0-9a-zA-Z]{4})`) + func unescape(s string) string { - s, err := strconv.Unquote(`"` + s + `"`) - if err != nil { - panic(err) - } - return s + return unescapeRE.ReplaceAllStringFunc(s, func(v string) string { + var d [2]byte + hex.Decode(d[:], []byte(v[2:])) + return string(rune(d[0])<<8 | rune(d[1])) + }) } func BenchmarkProfile(b *testing.B) { From 835dae61a3f3b1dd4f513e19a270937411e179f5 Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Thu, 28 Oct 2021 17:47:46 -0700 Subject: [PATCH 22/40] internal/export/idna: use nontransitional processing in Go 1.18 Updates golang/go#46001 Updates golang/go#47510 Change-Id: I1e978a3c6230abfd0b1aaab0c7343b33dda1ba64 Reviewed-on: https://go-review.googlesource.com/c/text/+/359634 Trust: Damien Neil Run-TryBot: Damien Neil TryBot-Result: Go Bot Reviewed-by: Timothy Gu Reviewed-by: Ian Lance Taylor --- internal/export/idna/example_test.go | 27 ++++++++++++------------- internal/export/idna/go118.go | 12 +++++++++++ internal/export/idna/idna10.0.0.go | 2 +- internal/export/idna/idna10.0.0_test.go | 14 ++++++++++--- internal/export/idna/idna_test.go | 2 +- internal/export/idna/pre_go118.go | 10 +++++++++ 6 files changed, 48 insertions(+), 19 deletions(-) create mode 100644 internal/export/idna/go118.go create mode 100644 internal/export/idna/pre_go118.go diff --git a/internal/export/idna/example_test.go b/internal/export/idna/example_test.go index 6e6b8727c..4c7352bdc 100644 --- a/internal/export/idna/example_test.go +++ b/internal/export/idna/example_test.go @@ -13,27 +13,26 @@ import ( func ExampleProfile() { // Raw Punycode has no restrictions and does no mappings. fmt.Println(idna.ToASCII("")) - fmt.Println(idna.ToASCII("*.faß.com")) - fmt.Println(idna.Punycode.ToASCII("*.faß.com")) + fmt.Println(idna.ToASCII("*.GÖPHER.com")) + fmt.Println(idna.Punycode.ToASCII("*.GÖPHER.com")) - // Rewrite IDN for lookup. This (currently) uses transitional mappings to - // find a balance between IDNA2003 and IDNA2008 compatibility. + // Rewrite IDN for lookup. fmt.Println(idna.Lookup.ToASCII("")) - fmt.Println(idna.Lookup.ToASCII("www.faß.com")) + fmt.Println(idna.Lookup.ToASCII("www.GÖPHER.com")) - // Convert an IDN to ASCII for registration purposes. This changes the - // encoding, but reports an error if the input was illformed. - fmt.Println(idna.Registration.ToASCII("")) - fmt.Println(idna.Registration.ToASCII("www.faß.com")) + // Convert an IDN to ASCII for registration purposes. + // This reports an error if the input was illformed. + fmt.Println(idna.Registration.ToASCII("www.GÖPHER.com")) + fmt.Println(idna.Registration.ToASCII("www.göpher.com")) // Output: // - // *.xn--fa-hia.com - // *.xn--fa-hia.com + // *.xn--GPHER-1oa.com + // *.xn--GPHER-1oa.com // - // www.fass.com - // idna: invalid label "" - // www.xn--fa-hia.com + // www.xn--gpher-jua.com + // www.xn--GPHER-1oa.com idna: disallowed rune U+0047 + // www.xn--gpher-jua.com } func ExampleNew() { diff --git a/internal/export/idna/go118.go b/internal/export/idna/go118.go new file mode 100644 index 000000000..941a7aaff --- /dev/null +++ b/internal/export/idna/go118.go @@ -0,0 +1,12 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.18 +// +build go1.18 + +package idna + +// Transitional processing is disabled by default in Go 1.18. +// https://golang.org/issue/47510 +const transitionalLookup = false diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index e6b62a287..0e7571d16 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -284,7 +284,7 @@ var ( punycode = &Profile{} lookup = &Profile{options{ - transitional: true, + transitional: transitionalLookup, useSTD3Rules: true, checkHyphens: true, checkJoiners: true, diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index 0b9f7a862..c3365bc6a 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -102,7 +102,7 @@ func TestLabelErrors(t *testing.T) { // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" + {transitional, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, // Transitional vs Nontransitional processing @@ -115,10 +115,10 @@ func TestLabelErrors(t *testing.T) { // punycode on the result using transitional mapping. // Firefox 49.0.1 goes haywire on this string and prints a bunch of what // seems to be nested punycode encodings. - {resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"}, + {transitional, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"}, {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"}, - {resolve, "a\u200Cb", "ab", ""}, + {transitional, "a\u200Cb", "ab", ""}, {display, "a\u200Cb", "a\u200Cb", "C"}, {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"}, @@ -153,3 +153,11 @@ func TestLabelErrors(t *testing.T) { doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr) } } + +func TestTransitionalDefault(t *testing.T) { + want := "xn--strae-oqa.de" + if transitionalLookup { + want = "strasse.de" + } + doTest(t, Lookup.ToASCII, "Lookup", "straße.de", want, "") +} diff --git a/internal/export/idna/idna_test.go b/internal/export/idna/idna_test.go index e568d3b90..a13b67348 100644 --- a/internal/export/idna/idna_test.go +++ b/internal/export/idna/idna_test.go @@ -45,7 +45,7 @@ func TestProfiles(t *testing.T) { VerifyDNSLength(true), BidiRule(), )}, - {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))}, + {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(transitionalLookup))}, {"Display", display, New(MapForLookup(), BidiRule())}, } for _, tc := range testCases { diff --git a/internal/export/idna/pre_go118.go b/internal/export/idna/pre_go118.go new file mode 100644 index 000000000..ab3fa2e8c --- /dev/null +++ b/internal/export/idna/pre_go118.go @@ -0,0 +1,10 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.18 +// +build !go1.18 + +package idna + +const transitionalLookup = true From 8da7c0fd2b032cc0b7be90fcb2d361c5ebc40fef Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Mon, 1 Nov 2021 13:27:11 -0700 Subject: [PATCH 23/40] gen.go: copy all tablesXX.X.X.go versions to golang.org/x/net Remove logic that copies only the most current tablesXX.X.X.go, renaming it to tables.go. The golang.org/x/net/idna package currently contains all the versioned files and no tables.go, so this change makes gen.go consistent with the last export. Change-Id: Ic2797b45ec998873651eda04ed56ada29788f0f0 Reviewed-on: https://go-review.googlesource.com/c/text/+/360380 Trust: Damien Neil Trust: Marcel van Lohuizen Run-TryBot: Damien Neil Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Reviewed-by: Marcel van Lohuizen --- gen.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/gen.go b/gen.go index 04ff6035c..fb2fb94a7 100644 --- a/gen.go +++ b/gen.go @@ -256,12 +256,6 @@ func copyPackage(dirSrc, dirDst, search, replace string) { filepath.Dir(file) != dirSrc { return nil } - if strings.HasPrefix(base, "tables") { - if !strings.HasSuffix(base, gen.UnicodeVersion()+".go") { - return nil - } - base = "tables.go" - } b, err := ioutil.ReadFile(file) if err != nil || bytes.Contains(b, []byte("\n// +build ignore")) { return err From 85a1c56496a61b2c4e607faaf3369d473cf2589d Mon Sep 17 00:00:00 2001 From: Amelia Downs Date: Mon, 1 Nov 2021 16:41:32 -0400 Subject: [PATCH 24/40] text/collate: add testable examples Change-Id: Id3915137c4d365ec82ce74d8212e7b6cfb6fb200 Reviewed-on: https://go-review.googlesource.com/c/text/+/360494 Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen --- collate/example_sort_test.go | 56 +++++++++++++++++++++++++ collate/examples_test.go | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 collate/example_sort_test.go create mode 100644 collate/examples_test.go diff --git a/collate/example_sort_test.go b/collate/example_sort_test.go new file mode 100644 index 000000000..e86c02a7a --- /dev/null +++ b/collate/example_sort_test.go @@ -0,0 +1,56 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package collate_test + +import ( + "fmt" + + "golang.org/x/text/collate" + "golang.org/x/text/language" +) + +type book struct { + title string +} + +type bookcase struct { + books []book +} + +func (bc bookcase) Len() int { + return len(bc.books) +} + +func (bc bookcase) Swap(i, j int) { + temp := bc.books[i] + bc.books[i] = bc.books[j] + bc.books[j] = temp +} + +func (bc bookcase) Bytes(i int) []byte { + // returns the bytes of text at index i + return []byte(bc.books[i].title) +} + +func ExampleCollator_Sort() { + bc := bookcase{ + books: []book{ + {title: "If Cats Disappeared from the World"}, + {title: "The Guest Cat"}, + {title: "Catwings"}, + }, + } + + cc := collate.New(language.English) + cc.Sort(bc) + + for _, b := range bc.books { + fmt.Println(b.title) + } + // Output: + // Catwings + // If Cats Disappeared from the World + // The Guest Cat +} diff --git a/collate/examples_test.go b/collate/examples_test.go new file mode 100644 index 000000000..0a42a6d21 --- /dev/null +++ b/collate/examples_test.go @@ -0,0 +1,79 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package collate_test + +import ( + "fmt" + + "golang.org/x/text/collate" + "golang.org/x/text/language" +) + +func ExampleNew() { + letters := []string{"ä", "å", "ö", "o", "a"} + + ec := collate.New(language.English) + ec.SortStrings(letters) + fmt.Printf("English Sorting: %v\n", letters) + + sc := collate.New(language.Swedish) + sc.SortStrings(letters) + fmt.Printf("Swedish Sorting: %v\n", letters) + + numbers := []string{"0", "11", "01", "2", "3", "23"} + + ec.SortStrings(numbers) + fmt.Printf("Alphabetic Sorting: %v\n", numbers) + + nc := collate.New(language.English, collate.Numeric) + nc.SortStrings(numbers) + fmt.Printf("Numeric Sorting: %v\n", numbers) + // Output: + // English Sorting: [a å ä o ö] + // Swedish Sorting: [a o å ä ö] + // Alphabetic Sorting: [0 01 11 2 23 3] + // Numeric Sorting: [0 01 2 3 11 23] +} + +func ExampleCollator_SortStrings() { + c := collate.New(language.English) + words := []string{"meow", "woof", "bark", "moo"} + c.SortStrings(words) + fmt.Println(words) + // Output: + // [bark meow moo woof] +} + +func ExampleCollator_CompareString() { + c := collate.New(language.English) + r := c.CompareString("meow", "woof") + fmt.Println(r) + + r = c.CompareString("woof", "meow") + fmt.Println(r) + + r = c.CompareString("meow", "meow") + fmt.Println(r) + // Output: + // -1 + // 1 + // 0 +} + +func ExampleCollator_Compare() { + c := collate.New(language.English) + r := c.Compare([]byte("meow"), []byte("woof")) + fmt.Println(r) + + r = c.Compare([]byte("woof"), []byte("meow")) + fmt.Println(r) + + r = c.Compare([]byte("meow"), []byte("meow")) + fmt.Println(r) + // Output: + // -1 + // 1 + // 0 +} From 459fa287ae002e3df243c0bd10ea915f5c64f687 Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Mon, 18 Oct 2021 14:56:52 +0000 Subject: [PATCH 25/40] text/currency: format currency amount according to the locale Fixes golang/go#47623 Change-Id: Ie6be9db93bf58f597f1ea4d864fcb507235b1018 GitHub-Last-Rev: 4c8f3557daf5440390c0775ed6e71ec80f8c11e8 GitHub-Pull-Request: golang/text#27 Reviewed-on: https://go-review.googlesource.com/c/text/+/353935 Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen Trust: Ian Lance Taylor Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot --- currency/format.go | 36 +++++++++++++++++++++--------------- currency/format_test.go | 23 ++++++++++++++++++++--- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/currency/format.go b/currency/format.go index 11152632a..cc4570d3b 100644 --- a/currency/format.go +++ b/currency/format.go @@ -6,11 +6,13 @@ package currency import ( "fmt" - "io" "sort" "golang.org/x/text/internal/format" "golang.org/x/text/internal/language/compact" + "golang.org/x/text/internal/number" + + "golang.org/x/text/language" ) // Amount is an amount-currency unit pair. @@ -34,8 +36,6 @@ func (a Amount) Currency() Unit { return a.currency } // // Add/Sub/Div/Mul/Round. -var space = []byte(" ") - // Format implements fmt.Formatter. It accepts format.State for // language-specific rendering. func (a Amount) Format(s fmt.State, verb rune) { @@ -58,9 +58,11 @@ type formattedValue struct { // Format implements fmt.Formatter. It accepts format.State for // language-specific rendering. func (v formattedValue) Format(s fmt.State, verb rune) { + var tag language.Tag var lang compact.ID if state, ok := s.(format.State); ok { - lang, _ = compact.RegionalID(compact.Tag(state.Language())) + tag = state.Language() + lang, _ = compact.RegionalID(compact.Tag(tag)) } // Get the options. Use DefaultFormat if not present. @@ -73,18 +75,22 @@ func (v formattedValue) Format(s fmt.State, verb rune) { cur = opt.currency } - // TODO: use pattern. - io.WriteString(s, opt.symbol(lang, cur)) + sym := opt.symbol(lang, cur) if v.amount != nil { - s.Write(space) - - // TODO: apply currency-specific rounding - scale, _ := opt.kind.Rounding(cur) - if _, ok := s.Precision(); !ok { - fmt.Fprintf(s, "%.*f", scale, v.amount) - } else { - fmt.Fprint(s, v.amount) - } + var f number.Formatter + f.InitDecimal(tag) + + scale, increment := opt.kind.Rounding(cur) + f.RoundingContext.SetScale(scale) + f.RoundingContext.Increment = uint32(increment) + f.RoundingContext.IncrementScale = uint8(scale) + f.RoundingContext.Mode = number.ToNearestAway + + d := f.Append(nil, v.amount) + + fmt.Fprint(s, sym, " ", string(d)) + } else { + fmt.Fprint(s, sym) } } diff --git a/currency/format_test.go b/currency/format_test.go index 0aa0d58af..5cb11ebc9 100644 --- a/currency/format_test.go +++ b/currency/format_test.go @@ -12,8 +12,10 @@ import ( ) var ( + de = language.German en = language.English fr = language.French + de_CH = language.MustParse("de-CH") en_US = language.AmericanEnglish en_GB = language.BritishEnglish en_AU = language.MustParse("en-AU") @@ -42,20 +44,35 @@ func TestFormatting(t *testing.T) { 9: {en, 9.0, Symbol.Default(EUR), "€ 9.00"}, 10: {en, 10.123, Symbol.Default(KRW), "₩ 10"}, - 11: {fr, 11.52, Symbol.Default(TWD), "TWD 11.52"}, + 11: {fr, 11.52, Symbol.Default(TWD), "TWD 11,52"}, 12: {en, 12.123, Symbol.Default(czk), "CZK 12.12"}, 13: {en, 13.123, Symbol.Default(czk).Kind(Cash), "CZK 13"}, 14: {en, 14.12345, ISO.Default(MustParseISO("CLF")), "CLF 14.1235"}, 15: {en, USD.Amount(15.00), ISO.Default(TWD), "USD 15.00"}, 16: {en, KRW.Amount(16.00), ISO.Kind(Cash), "KRW 16"}, - // TODO: support integers as well. - 17: {en, USD, nil, "USD"}, 18: {en, USD, ISO, "USD"}, 19: {en, USD, Symbol, "$"}, 20: {en_GB, USD, Symbol, "US$"}, 21: {en_AU, USD, NarrowSymbol, "$"}, + + // https://en.wikipedia.org/wiki/Decimal_separator + 22: {de, EUR.Amount(1234567.89), nil, "EUR 1.234.567,89"}, + 23: {fr, EUR.Amount(1234567.89), nil, "EUR 1\u00a0234\u00a0567,89"}, + 24: {en_AU, EUR.Amount(1234567.89), nil, "EUR 1,234,567.89"}, + 25: {de_CH, EUR.Amount(1234567.89), nil, "EUR 1’234’567.89"}, + + // https://en.wikipedia.org/wiki/Cash_rounding + 26: {de, NOK.Amount(2.49), ISO.Kind(Cash), "NOK 2"}, + 27: {de, NOK.Amount(2.50), ISO.Kind(Cash), "NOK 3"}, + 28: {de, DKK.Amount(0.24), ISO.Kind(Cash), "DKK 0,00"}, + 29: {de, DKK.Amount(0.25), ISO.Kind(Cash), "DKK 0,50"}, + + // integers + 30: {de, EUR.Amount(1234567), nil, "EUR 1.234.567,00"}, + 31: {en, CNY.Amount(0), NarrowSymbol, "¥ 0.00"}, + 32: {en, CNY.Amount(0), Symbol, "CN¥ 0.00"}, } for i, tc := range testCases { p := message.NewPrinter(tc.tag) From 310d592b71f764a238114c372e0ce7db4ecb560a Mon Sep 17 00:00:00 2001 From: Sean Liao Date: Thu, 21 Oct 2021 19:49:33 +0200 Subject: [PATCH 26/40] cmd/gotext: only match files ending with pattern Exclude files with other extensions like .json.swp from editors. Fixes golang/go#48983 Change-Id: Id74ca7ae208688cf900661d641e5403d453da33c Reviewed-on: https://go-review.googlesource.com/c/text/+/357734 Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen Trust: Ian Lance Taylor Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot --- cmd/gotext/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/gotext/main.go b/cmd/gotext/main.go index f31dd4fbd..c8dc7990a 100644 --- a/cmd/gotext/main.go +++ b/cmd/gotext/main.go @@ -48,7 +48,7 @@ func config() (*pipeline.Config, error) { return &pipeline.Config{ SourceLanguage: tag, Supported: getLangs(), - TranslationsPattern: `messages\.(.*)\.json`, + TranslationsPattern: `messages\.(.*)\.json$`, GenFile: *out, }, nil } From 7d8748685ddeb50953764cb62aa1064f0d928d61 Mon Sep 17 00:00:00 2001 From: Shengyu Zhang Date: Tue, 2 Nov 2021 09:32:53 +0000 Subject: [PATCH 27/40] text/unicod/bidi: remove duplicate assignment Fixes golang/go#43623 Change-Id: I4da134cccaf6a9e5331229a0ac2a60e5cb711e92 GitHub-Last-Rev: ae0f2d91f86dfd045bb403f5906ba80d2b9092de GitHub-Pull-Request: golang/text#29 Reviewed-on: https://go-review.googlesource.com/c/text/+/358834 Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen Trust: Ian Lance Taylor --- unicode/bidi/core.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicode/bidi/core.go b/unicode/bidi/core.go index e4c081101..fde188a33 100644 --- a/unicode/bidi/core.go +++ b/unicode/bidi/core.go @@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() { if t == NSM { s.types[i] = precedingCharacterType } else { - if t.in(LRI, RLI, FSI, PDI) { - precedingCharacterType = ON - } + // if t.in(LRI, RLI, FSI, PDI) { + // precedingCharacterType = ON + // } precedingCharacterType = t } } From 18b340fc7af22495828ffbe71e9f9e22583bc7a9 Mon Sep 17 00:00:00 2001 From: Amelia Downs Date: Fri, 29 Oct 2021 13:32:13 -0400 Subject: [PATCH 28/40] language: fix typo in update docs Change-Id: Ied188b87f0a9a1a4fb160b2a7ba239ed70b843a6 Reviewed-on: https://go-review.googlesource.com/c/text/+/359715 Reviewed-by: Ian Lance Taylor Reviewed-by: Amelia Downs Trust: Cherry Mui --- language/match.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language/match.go b/language/match.go index f73492134..ee45f4947 100644 --- a/language/match.go +++ b/language/match.go @@ -545,7 +545,7 @@ type bestMatch struct { // match as the preferred match. // // If pin is true and have and tag are a strong match, it will henceforth only -// consider matches for this language. This corresponds to the nothing that most +// consider matches for this language. This corresponds to the idea that most // users have a strong preference for the first defined language. A user can // still prefer a second language over a dialect of the preferred language by // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should From d1c84af989ab0f62cd853b5ae33b1b4db4f1e88b Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Sun, 23 Jan 2022 16:23:19 -0500 Subject: [PATCH 29/40] message/pipeline: skip TestFullCycle on plan9-arm This test has timed out several times recently on this builder. The test is pretty slow even on Linux, and may be especially filesystem-intensive. For golang/go#49338 Change-Id: Ife2bb399b10f369f815055bb0ad44bb007f606b7 Reviewed-on: https://go-review.googlesource.com/c/text/+/380414 Trust: Bryan Mills Run-TryBot: Bryan Mills TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor --- message/pipeline/pipeline_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/message/pipeline/pipeline_test.go b/message/pipeline/pipeline_test.go index 51c14a50b..2adb11fd2 100644 --- a/message/pipeline/pipeline_test.go +++ b/message/pipeline/pipeline_test.go @@ -33,6 +33,9 @@ func TestFullCycle(t *testing.T) { if runtime.GOOS == "android" { t.Skip("cannot load outside packages on android") } + if b := os.Getenv("GO_BUILDER_NAME"); b == "plan9-arm" { + t.Skipf("skipping: test frequently times out on %s", b) + } if _, err := exec.LookPath("go"); err != nil { t.Skipf("skipping because 'go' command is unavailable: %v", err) } From 8db23f83d6d6c5a1bcecede55281db3c11fc7c60 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 11 Apr 2022 13:13:21 -0400 Subject: [PATCH 30/40] all: gofmt Gofmt to update doc comments to the new formatting. For golang/go#51082. Change-Id: I3e3c5666d5e901f2c5303911ddb548e3dd567fce Reviewed-on: https://go-review.googlesource.com/c/text/+/399603 Run-TryBot: Russ Cox TryBot-Result: Gopher Robot Auto-Submit: Russ Cox Reviewed-by: Ian Lance Taylor --- cases/gen_trieval.go | 73 ++++++++++++----------- cases/trieval.go | 73 ++++++++++++----------- cmd/gotext/doc.go | 39 ++++-------- collate/build/builder.go | 39 +++++++----- collate/build/colelem.go | 8 ++- collate/build/contract.go | 25 ++++---- feature/plural/gen.go | 15 ++--- feature/plural/message.go | 14 ++--- feature/plural/plural.go | 51 ++++++++-------- internal/catmsg/catmsg.go | 50 ++++++++-------- internal/cldrtree/cldrtree.go | 1 - internal/colltab/collelem.go | 39 ++++++------ internal/export/idna/gen_trieval.go | 34 +++++------ internal/export/idna/trieval.go | 34 +++++------ internal/language/tables.go | 17 +++--- internal/number/decimal.go | 48 +++++++-------- internal/testtext/codesize.go | 12 ++-- internal/triegen/triegen.go | 25 ++++---- internal/utf8internal/utf8internal.go | 2 +- language/display/lookup.go | 16 ++--- language/doc.go | 44 +++++++------- message/catalog/catalog.go | 85 +++++++++++++-------------- message/doc.go | 50 ++++++++-------- number/doc.go | 17 +++--- unicode/bidi/core.go | 20 +++---- unicode/cldr/collate.go | 8 ++- unicode/norm/forminfo.go | 9 +-- unicode/norm/maketables.go | 2 + unicode/norm/normalize.go | 11 ++-- width/tables10.0.0.go | 24 +++++--- width/tables11.0.0.go | 24 +++++--- width/tables12.0.0.go | 24 +++++--- width/tables13.0.0.go | 24 +++++--- width/tables9.0.0.go | 24 +++++--- 34 files changed, 528 insertions(+), 453 deletions(-) diff --git a/cases/gen_trieval.go b/cases/gen_trieval.go index 6c7222a73..14dd33b1d 100644 --- a/cases/gen_trieval.go +++ b/cases/gen_trieval.go @@ -19,19 +19,19 @@ package main // // The per-rune values have the following format: // -// if (exception) { -// 15..4 unsigned exception index -// } else { -// 15..8 XOR pattern or index to XOR pattern for case mapping -// Only 13..8 are used for XOR patterns. -// 7 inverseFold (fold to upper, not to lower) -// 6 index: interpret the XOR pattern as an index -// or isMid if case mode is cIgnorableUncased. -// 5..4 CCC: zero (normal or break), above or other -// } -// 3 exception: interpret this value as an exception index -// (TODO: is this bit necessary? Probably implied from case mode.) -// 2..0 case mode +// if (exception) { +// 15..4 unsigned exception index +// } else { +// 15..8 XOR pattern or index to XOR pattern for case mapping +// Only 13..8 are used for XOR patterns. +// 7 inverseFold (fold to upper, not to lower) +// 6 index: interpret the XOR pattern as an index +// or isMid if case mode is cIgnorableUncased. +// 5..4 CCC: zero (normal or break), above or other +// } +// 3 exception: interpret this value as an exception index +// (TODO: is this bit necessary? Probably implied from case mode.) +// 2..0 case mode // // For the non-exceptional cases, a rune must be either uncased, lowercase or // uppercase. If the rune is cased, the XOR pattern maps either a lowercase @@ -133,37 +133,40 @@ const ( // The entry is pointed to by the exception index in an entry. It has the // following format: // -// Header -// byte 0: -// 7..6 unused -// 5..4 CCC type (same bits as entry) -// 3 unused -// 2..0 length of fold +// Header: // -// byte 1: -// 7..6 unused -// 5..3 length of 1st mapping of case type -// 2..0 length of 2nd mapping of case type +// byte 0: +// 7..6 unused +// 5..4 CCC type (same bits as entry) +// 3 unused +// 2..0 length of fold // -// case 1st 2nd -// lower -> upper, title -// upper -> lower, title -// title -> lower, upper +// byte 1: +// 7..6 unused +// 5..3 length of 1st mapping of case type +// 2..0 length of 2nd mapping of case type +// +// case 1st 2nd +// lower -> upper, title +// upper -> lower, title +// title -> lower, upper // // Lengths with the value 0x7 indicate no value and implies no change. // A length of 0 indicates a mapping to zero-length string. // // Body bytes: -// case folding bytes -// lowercase mapping bytes -// uppercase mapping bytes -// titlecase mapping bytes -// closure mapping bytes (for NFKC_Casefold). (TODO) +// +// case folding bytes +// lowercase mapping bytes +// uppercase mapping bytes +// titlecase mapping bytes +// closure mapping bytes (for NFKC_Casefold). (TODO) // // Fallbacks: -// missing fold -> lower -// missing title -> upper -// all missing -> original rune +// +// missing fold -> lower +// missing title -> upper +// all missing -> original rune // // exceptions starts with a dummy byte to enforce that there is no zero index // value. diff --git a/cases/trieval.go b/cases/trieval.go index 99e039628..4e4d13fe5 100644 --- a/cases/trieval.go +++ b/cases/trieval.go @@ -14,19 +14,19 @@ package cases // // The per-rune values have the following format: // -// if (exception) { -// 15..4 unsigned exception index -// } else { -// 15..8 XOR pattern or index to XOR pattern for case mapping -// Only 13..8 are used for XOR patterns. -// 7 inverseFold (fold to upper, not to lower) -// 6 index: interpret the XOR pattern as an index -// or isMid if case mode is cIgnorableUncased. -// 5..4 CCC: zero (normal or break), above or other -// } -// 3 exception: interpret this value as an exception index -// (TODO: is this bit necessary? Probably implied from case mode.) -// 2..0 case mode +// if (exception) { +// 15..4 unsigned exception index +// } else { +// 15..8 XOR pattern or index to XOR pattern for case mapping +// Only 13..8 are used for XOR patterns. +// 7 inverseFold (fold to upper, not to lower) +// 6 index: interpret the XOR pattern as an index +// or isMid if case mode is cIgnorableUncased. +// 5..4 CCC: zero (normal or break), above or other +// } +// 3 exception: interpret this value as an exception index +// (TODO: is this bit necessary? Probably implied from case mode.) +// 2..0 case mode // // For the non-exceptional cases, a rune must be either uncased, lowercase or // uppercase. If the rune is cased, the XOR pattern maps either a lowercase @@ -128,37 +128,40 @@ const ( // The entry is pointed to by the exception index in an entry. It has the // following format: // -// Header -// byte 0: -// 7..6 unused -// 5..4 CCC type (same bits as entry) -// 3 unused -// 2..0 length of fold +// Header: // -// byte 1: -// 7..6 unused -// 5..3 length of 1st mapping of case type -// 2..0 length of 2nd mapping of case type +// byte 0: +// 7..6 unused +// 5..4 CCC type (same bits as entry) +// 3 unused +// 2..0 length of fold // -// case 1st 2nd -// lower -> upper, title -// upper -> lower, title -// title -> lower, upper +// byte 1: +// 7..6 unused +// 5..3 length of 1st mapping of case type +// 2..0 length of 2nd mapping of case type +// +// case 1st 2nd +// lower -> upper, title +// upper -> lower, title +// title -> lower, upper // // Lengths with the value 0x7 indicate no value and implies no change. // A length of 0 indicates a mapping to zero-length string. // // Body bytes: -// case folding bytes -// lowercase mapping bytes -// uppercase mapping bytes -// titlecase mapping bytes -// closure mapping bytes (for NFKC_Casefold). (TODO) +// +// case folding bytes +// lowercase mapping bytes +// uppercase mapping bytes +// titlecase mapping bytes +// closure mapping bytes (for NFKC_Casefold). (TODO) // // Fallbacks: -// missing fold -> lower -// missing title -> upper -// all missing -> original rune +// +// missing fold -> lower +// missing title -> upper +// all missing -> original rune // // exceptions starts with a dummy byte to enforce that there is no zero index // value. diff --git a/cmd/gotext/doc.go b/cmd/gotext/doc.go index fa247c6d3..d363ae25e 100644 --- a/cmd/gotext/doc.go +++ b/cmd/gotext/doc.go @@ -4,60 +4,47 @@ // // Usage: // -// gotext command [arguments] +// gotext command [arguments] // // The commands are: // -// update merge translations and generate catalog -// extract extracts strings to be translated from code -// rewrite rewrites fmt functions to use a message Printer -// generate generates code to insert translated messages +// update merge translations and generate catalog +// extract extracts strings to be translated from code +// rewrite rewrites fmt functions to use a message Printer +// generate generates code to insert translated messages // // Use "gotext help [command]" for more information about a command. // // Additional help topics: // -// // Use "gotext help [topic]" for more information about that topic. // -// -// Merge translations and generate catalog +// # Merge translations and generate catalog // // Usage: // -// gotext update * [-out ] -// -// +// gotext update * [-out ] // -// -// Extracts strings to be translated from code +// # Extracts strings to be translated from code // // Usage: // -// gotext extract * -// -// +// gotext extract * // -// -// Rewrites fmt functions to use a message Printer +// # Rewrites fmt functions to use a message Printer // // Usage: // -// gotext rewrite +// gotext rewrite // // rewrite is typically done once for a project. It rewrites all usages of // fmt to use x/text's message package whenever a message.Printer is in scope. // It rewrites Print and Println calls with constant strings to the equivalent // using Printf to allow translators to reorder arguments. // -// -// Generates code to insert translated messages +// # Generates code to insert translated messages // // Usage: // -// gotext generate -// -// -// -// +// gotext generate package main diff --git a/collate/build/builder.go b/collate/build/builder.go index 092a4b506..3efb7387d 100644 --- a/collate/build/builder.go +++ b/collate/build/builder.go @@ -225,26 +225,37 @@ func (t *Tailoring) SetAnchorBefore(anchor string) error { // // Examples: create a tailoring for Swedish, where "ä" is ordered after "z" // at the primary sorting level: -// t := b.Tailoring("se") -// t.SetAnchor("z") -// t.Insert(colltab.Primary, "ä", "") +// +// t := b.Tailoring("se") +// t.SetAnchor("z") +// t.Insert(colltab.Primary, "ä", "") +// // Order "ü" after "ue" at the secondary sorting level: -// t.SetAnchor("ue") -// t.Insert(colltab.Secondary, "ü","") +// +// t.SetAnchor("ue") +// t.Insert(colltab.Secondary, "ü","") +// // or -// t.SetAnchor("u") -// t.Insert(colltab.Secondary, "ü", "e") +// +// t.SetAnchor("u") +// t.Insert(colltab.Secondary, "ü", "e") +// // Order "q" afer "ab" at the secondary level and "Q" after "q" // at the tertiary level: -// t.SetAnchor("ab") -// t.Insert(colltab.Secondary, "q", "") -// t.Insert(colltab.Tertiary, "Q", "") +// +// t.SetAnchor("ab") +// t.Insert(colltab.Secondary, "q", "") +// t.Insert(colltab.Tertiary, "Q", "") +// // Order "b" before "a": -// t.SetAnchorBefore("a") -// t.Insert(colltab.Primary, "b", "") +// +// t.SetAnchorBefore("a") +// t.Insert(colltab.Primary, "b", "") +// // Order "0" after the last primary ignorable: -// t.SetAnchor("") -// t.Insert(colltab.Primary, "0", "") +// +// t.SetAnchor("") +// t.Insert(colltab.Primary, "0", "") func (t *Tailoring) Insert(level colltab.Level, str, extend string) error { if t.anchor == nil { return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str) diff --git a/collate/build/colelem.go b/collate/build/colelem.go index 04fc3bfb7..1aaa062c5 100644 --- a/collate/build/colelem.go +++ b/collate/build/colelem.go @@ -51,6 +51,7 @@ func makeCE(ce rawCE) (uint32, error) { // - n* is the size of the first node in the contraction trie. // - i* is the index of the first node in the contraction trie. // - b* is the offset into the contraction collation element table. +// // See contract.go for details on the contraction trie. const ( contractID = 0xC0000000 @@ -103,7 +104,8 @@ func makeExpansionHeader(n int) (uint32, error) { // The collation element, in this case, is of the form // 11110000 00000000 wwwwwwww vvvvvvvv, where // - v* is the replacement tertiary weight for the first rune, -// - w* is the replacement tertiary weight for the second rune, +// - w* is the replacement tertiary weight for the second rune. +// // Tertiary weights of subsequent runes should be replaced with maxTertiary. // See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. const ( @@ -162,7 +164,9 @@ func implicitPrimary(r rune) int { // primaries (either double primaries or for illegal runes) // to our own representation. // A CJK character C is represented in the DUCET as -// [.FBxx.0020.0002.C][.BBBB.0000.0000.C] +// +// [.FBxx.0020.0002.C][.BBBB.0000.0000.C] +// // We will rewrite these characters to a single CE. // We assume the CJK values start at 0x8000. // See https://unicode.org/reports/tr10/#Implicit_Weights diff --git a/collate/build/contract.go b/collate/build/contract.go index e2df64f0c..5d79eb8bf 100644 --- a/collate/build/contract.go +++ b/collate/build/contract.go @@ -56,19 +56,22 @@ const ( // entry might still resemble a completed suffix. // Examples: // The suffix strings "ab" and "ac" can be represented as: -// []ctEntry{ -// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF. -// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2 -// } +// +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF. +// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2 +// } // // The suffix strings "ab", "abc", "abd", and "abcd" can be represented as: -// []ctEntry{ -// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'. -// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'. -// {'d', 'd', final, 3}, // "abd" -> 3 -// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'. -// {'d', 'd', final, 4}, // "abcd" -> 4 -// } +// +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'. +// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'. +// {'d', 'd', final, 3}, // "abd" -> 3 +// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'. +// {'d', 'd', final, 4}, // "abcd" -> 4 +// } +// // See genStateTests in contract_test.go for more examples. type ctEntry struct { L uint8 // non-final: byte value to match; final: lowest match in range. diff --git a/feature/plural/gen.go b/feature/plural/gen.go index b9c5f2493..5f8f375fb 100644 --- a/feature/plural/gen.go +++ b/feature/plural/gen.go @@ -359,15 +359,16 @@ var operandIndex = map[string]opID{ // the resulting or conditions to conds. // // Example rules: -// // Category "one" in English: only allow 1 with no visible fraction -// i = 1 and v = 0 @integer 1 // -// // Category "few" in Czech: all numbers with visible fractions -// v != 0 @decimal ... +// // Category "one" in English: only allow 1 with no visible fraction +// i = 1 and v = 0 @integer 1 // -// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or -// // numbers with a fraction 11..19 and no trailing zeros. -// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ... +// // Category "few" in Czech: all numbers with visible fractions +// v != 0 @decimal ... +// +// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or +// // numbers with a fraction 11..19 and no trailing zeros. +// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ... // // @integer and @decimal are followed by examples and are not relevant for the // rule itself. The are used here to signal the termination of the rule. diff --git a/feature/plural/message.go b/feature/plural/message.go index f931f8a6a..6248d01cc 100644 --- a/feature/plural/message.go +++ b/feature/plural/message.go @@ -35,13 +35,13 @@ type Interface interface { // The cases argument are pairs of selectors and messages. Selectors are of type // string or Form. Messages are of type string or catalog.Message. A selector // matches an argument if: -// - it is "other" or Other -// - it matches the plural form of the argument: "zero", "one", "two", "few", -// or "many", or the equivalent Form -// - it is of the form "=x" where x is an integer that matches the value of -// the argument. -// - it is of the form " 1. -// - Otherwise the result is i % 10^nMod. +// - Let i be asInt(digits[start:end]), where out-of-range digits are assumed +// to be zero. +// - Result n is big if i / 10^nMod > 1. +// - Otherwise the result is i % 10^nMod. // // For example, if digits is {1, 2, 3} and start:end is 0:5, then the result // for various values of nMod is: -// - when nMod == 2, n == big -// - when nMod == 3, n == big -// - when nMod == 4, n == big -// - when nMod == 5, n == 12300 -// - when nMod == 6, n == 12300 -// - when nMod == 7, n == 12300 +// - when nMod == 2, n == big +// - when nMod == 3, n == big +// - when nMod == 4, n == big +// - when nMod == 5, n == 12300 +// - when nMod == 6, n == 12300 +// - when nMod == 7, n == 12300 func getIntApprox(digits []byte, start, end, nMod, big int) (n int) { // Leading 0 digits just result in 0. p := start @@ -107,12 +106,13 @@ func getIntApprox(digits []byte, start, end, nMod, big int) (n int) { // // The following table contains examples of possible arguments to represent // the given numbers. -// decimal digits exp scale -// 123 []byte{1, 2, 3} 3 0 -// 123.4 []byte{1, 2, 3, 4} 3 1 -// 123.40 []byte{1, 2, 3, 4} 3 2 -// 100000 []byte{1} 6 0 -// 100000.00 []byte{1} 6 3 +// +// decimal digits exp scale +// 123 []byte{1, 2, 3} 3 0 +// 123.4 []byte{1, 2, 3, 4} 3 1 +// 123.40 []byte{1, 2, 3, 4} 3 2 +// 100000 []byte{1} 6 0 +// 100000.00 []byte{1} 6 3 func (p *Rules) MatchDigits(t language.Tag, digits []byte, exp, scale int) Form { index := tagToID(t) @@ -152,14 +152,15 @@ func (p *Rules) matchComponents(t language.Tag, n, f, scale int) Form { // MatchPlural returns the plural form for the given language and plural // operands (as defined in // https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules): -// where -// n absolute value of the source number (integer and decimals) -// input -// i integer digits of n. -// v number of visible fraction digits in n, with trailing zeros. -// w number of visible fraction digits in n, without trailing zeros. -// f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w)) -// t visible fractional digits in n, without trailing zeros. +// +// where +// n absolute value of the source number (integer and decimals) +// input +// i integer digits of n. +// v number of visible fraction digits in n, with trailing zeros. +// w number of visible fraction digits in n, without trailing zeros. +// f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w)) +// t visible fractional digits in n, without trailing zeros. // // If any of the operand values is too large to fit in an int, it is okay to // pass the value modulo 10,000,000. diff --git a/internal/catmsg/catmsg.go b/internal/catmsg/catmsg.go index c0bf86f09..1b257a7b4 100644 --- a/internal/catmsg/catmsg.go +++ b/internal/catmsg/catmsg.go @@ -9,8 +9,7 @@ // own. For instance, the plural package provides functionality for selecting // translation strings based on the plural category of substitution arguments. // -// -// Encoding and Decoding +// # Encoding and Decoding // // Catalogs store Messages encoded as a single string. Compiling a message into // a string both results in compacter representation and speeds up evaluation. @@ -25,8 +24,7 @@ // the message. This decoder takes a Decoder argument which provides the // counterparts for the decoding. // -// -// Renderers +// # Renderers // // A Decoder must be initialized with a Renderer implementation. These // implementations must be provided by packages that use Catalogs, typically @@ -38,22 +36,22 @@ // as sequence of substrings passed to the Renderer. The following snippet shows // how to express the above example using the message package. // -// message.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", plural.Select(1, "one", "minute")), -// catalog.String("You are %[1]d ${minutes} late.")) +// message.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", plural.Select(1, "one", "minute")), +// catalog.String("You are %[1]d ${minutes} late.")) // -// p := message.NewPrinter(language.English) -// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. +// p := message.NewPrinter(language.English) +// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. // // To evaluate the Printf, package message wraps the arguments in a Renderer // that is passed to the catalog for message decoding. The call sequence that // results from evaluating the above message, assuming the person is rather // tardy, is: // -// Render("You are %[1]d ") -// Arg(1) -// Render("minutes") -// Render(" late.") +// Render("You are %[1]d ") +// Arg(1) +// Render("minutes") +// Render(" late.") // // The calls to Arg is caused by the plural.Select execution, which evaluates // the argument to determine whether the singular or plural message form should @@ -267,10 +265,12 @@ func (s FirstOf) Compile(e *Encoder) error { // Var defines a message that can be substituted for a placeholder of the same // name. If an expression does not result in a string after evaluation, Name is // used as the substitution. For example: -// Var{ -// Name: "minutes", -// Message: plural.Select(1, "one", "minute"), -// } +// +// Var{ +// Name: "minutes", +// Message: plural.Select(1, "one", "minute"), +// } +// // will resolve to minute for singular and minutes for plural forms. type Var struct { Name string @@ -318,13 +318,15 @@ func (r Raw) Compile(e *Encoder) (err error) { // calls for each placeholder and interstitial string. For example, for the // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls // is: -// d.Render("%[1]v ") -// d.Arg(1) -// d.Render(resultOfInvites) -// d.Render(" %[2]v to ") -// d.Arg(2) -// d.Render(resultOfTheir) -// d.Render(" party.") +// +// d.Render("%[1]v ") +// d.Arg(1) +// d.Render(resultOfInvites) +// d.Render(" %[2]v to ") +// d.Arg(2) +// d.Render(resultOfTheir) +// d.Render(" party.") +// // where the messages for "invites" and "their" both use a plural.Select // referring to the first argument. // diff --git a/internal/cldrtree/cldrtree.go b/internal/cldrtree/cldrtree.go index 7530831d6..cc2714e99 100644 --- a/internal/cldrtree/cldrtree.go +++ b/internal/cldrtree/cldrtree.go @@ -4,7 +4,6 @@ // Package cldrtree builds and generates a CLDR index file, including all // inheritance. -// package cldrtree //go:generate go test -gen diff --git a/internal/colltab/collelem.go b/internal/colltab/collelem.go index 396cebda2..0c23c8a48 100644 --- a/internal/colltab/collelem.go +++ b/internal/colltab/collelem.go @@ -78,24 +78,27 @@ func (ce Elem) ctype() ceType { // For normal collation elements, we assume that a collation element either has // a primary or non-default secondary value, not both. // Collation elements with a primary value are of the form -// 01pppppp pppppppp ppppppp0 ssssssss -// - p* is primary collation value -// - s* is the secondary collation value -// 00pppppp pppppppp ppppppps sssttttt, where -// - p* is primary collation value -// - s* offset of secondary from default value. -// - t* is the tertiary collation value -// 100ttttt cccccccc pppppppp pppppppp -// - t* is the tertiar collation value -// - c* is the canonical combining class -// - p* is the primary collation value +// +// 01pppppp pppppppp ppppppp0 ssssssss +// - p* is primary collation value +// - s* is the secondary collation value +// 00pppppp pppppppp ppppppps sssttttt, where +// - p* is primary collation value +// - s* offset of secondary from default value. +// - t* is the tertiary collation value +// 100ttttt cccccccc pppppppp pppppppp +// - t* is the tertiar collation value +// - c* is the canonical combining class +// - p* is the primary collation value +// // Collation elements with a secondary value are of the form -// 1010cccc ccccssss ssssssss tttttttt, where -// - c* is the canonical combining class -// - s* is the secondary collation value -// - t* is the tertiary collation value -// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 -// - q* quaternary value +// +// 1010cccc ccccssss ssssssss tttttttt, where +// - c* is the canonical combining class +// - s* is the secondary collation value +// - t* is the tertiary collation value +// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 +// - q* quaternary value const ( ceTypeMask = 0xC0000000 ceTypeMaskExt = 0xE0000000 @@ -296,6 +299,7 @@ func (ce Elem) Weight(l Level) int { // - n* is the size of the first node in the contraction trie. // - i* is the index of the first node in the contraction trie. // - b* is the offset into the contraction collation element table. +// // See contract.go for details on the contraction trie. const ( maxNBits = 4 @@ -326,6 +330,7 @@ func splitExpandIndex(ce Elem) (index int) { // The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where // - v* is the replacement tertiary weight for the first rune, // - w* is the replacement tertiary weight for the second rune, +// // Tertiary weights of subsequent runes should be replaced with maxTertiary. // See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. func splitDecompose(ce Elem) (t1, t2 uint8) { diff --git a/internal/export/idna/gen_trieval.go b/internal/export/idna/gen_trieval.go index 9d92407f2..501bfabed 100644 --- a/internal/export/idna/gen_trieval.go +++ b/internal/export/idna/gen_trieval.go @@ -22,23 +22,23 @@ package main // // The per-rune values have the following format: // -// if mapped { -// if inlinedXOR { -// 15..13 inline XOR marker -// 12..11 unused -// 10..3 inline XOR mask -// } else { -// 15..3 index into xor or mapping table -// } -// } else { -// 15..14 unused -// 13 mayNeedNorm -// 12..11 attributes -// 10..8 joining type -// 7..3 category type -// } -// 2 use xor pattern -// 1..0 mapped category +// if mapped { +// if inlinedXOR { +// 15..13 inline XOR marker +// 12..11 unused +// 10..3 inline XOR mask +// } else { +// 15..3 index into xor or mapping table +// } +// } else { +// 15..14 unused +// 13 mayNeedNorm +// 12..11 attributes +// 10..8 joining type +// 7..3 category type +// } +// 2 use xor pattern +// 1..0 mapped category // // See the definitions below for a more detailed description of the various // bits. diff --git a/internal/export/idna/trieval.go b/internal/export/idna/trieval.go index 7a8cf889b..9c070a44b 100644 --- a/internal/export/idna/trieval.go +++ b/internal/export/idna/trieval.go @@ -17,23 +17,23 @@ package idna // // The per-rune values have the following format: // -// if mapped { -// if inlinedXOR { -// 15..13 inline XOR marker -// 12..11 unused -// 10..3 inline XOR mask -// } else { -// 15..3 index into xor or mapping table -// } -// } else { -// 15..14 unused -// 13 mayNeedNorm -// 12..11 attributes -// 10..8 joining type -// 7..3 category type -// } -// 2 use xor pattern -// 1..0 mapped category +// if mapped { +// if inlinedXOR { +// 15..13 inline XOR marker +// 12..11 unused +// 10..3 inline XOR mask +// } else { +// 15..3 index into xor or mapping table +// } +// } else { +// 15..14 unused +// 13 mayNeedNorm +// 12..11 attributes +// 10..8 joining type +// 7..3 category type +// } +// 2 use xor pattern +// 1..0 mapped category // // See the definitions below for a more detailed description of the various // bits. diff --git a/internal/language/tables.go b/internal/language/tables.go index a19480c5b..3552e1afc 100644 --- a/internal/language/tables.go +++ b/internal/language/tables.go @@ -121,9 +121,10 @@ const langPrivateEnd = 0x3179 // lang holds an alphabetically sorted list of ISO-639 language identifiers. // All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. // For 2-byte language identifiers, the two successive bytes have the following meaning: -// - if the first letter of the 2- and 3-letter ISO codes are the same: -// the second and third letter of the 3-letter ISO code. -// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +// - if the first letter of the 2- and 3-letter ISO codes are the same: +// the second and third letter of the 3-letter ISO code. +// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +// // For 3-byte language identifiers the 4th byte is 0. const lang tag.Index = "" + // Size: 5324 bytes "---\x00aaaraai\x00aak\x00aau\x00abbkabi\x00abq\x00abr\x00abt\x00aby\x00a" + @@ -1086,9 +1087,9 @@ var regionTypes = [358]uint8{ // regionISO holds a list of alphabetically sorted 2-letter ISO region codes. // Each 2-letter codes is followed by two bytes with the following meaning: -// - [A-Z}{2}: the first letter of the 2-letter code plus these two -// letters form the 3-letter ISO code. -// - 0, n: index into altRegionISO3. +// - [A-Z]{2}: the first letter of the 2-letter code plus these two +// letters form the 3-letter ISO code. +// - 0, n: index into altRegionISO3. const regionISO tag.Index = "" + // Size: 1308 bytes "AAAAACSCADNDAEREAFFGAGTGAIIAALLBAMRMANNTAOGOAQTAARRGASSMATUTAUUSAWBWAXLA" + "AZZEBAIHBBRBBDGDBEELBFFABGGRBHHRBIDIBJENBLLMBMMUBNRNBOOLBQESBRRABSHSBTTN" + @@ -1206,7 +1207,9 @@ var m49 = [358]int16{ // m49Index gives indexes into fromM49 based on the three most significant bits // of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in -// fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +// +// fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +// // for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. // The region code is stored in the 9 lsb of the indexed value. // Size: 18 bytes, 9 elements diff --git a/internal/number/decimal.go b/internal/number/decimal.go index cb656db6c..37e0c4b98 100644 --- a/internal/number/decimal.go +++ b/internal/number/decimal.go @@ -33,13 +33,14 @@ const maxIntDigits = 20 // may point outside a valid position in Digits. // // Examples: -// Number Decimal -// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 -// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 -// 12000 Digits: [1, 2], Exp: 5 -// 12000.00 Digits: [1, 2], Exp: 5 -// 0.00123 Digits: [1, 2, 3], Exp: -2 -// 0 Digits: [], Exp: 0 +// +// Number Decimal +// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 +// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 +// 12000 Digits: [1, 2], Exp: 5 +// 12000.00 Digits: [1, 2], Exp: 5 +// 0.00123 Digits: [1, 2, 3], Exp: -2 +// 0 Digits: [], Exp: 0 type Decimal struct { digits @@ -60,22 +61,23 @@ type digits struct { // engineering notation. Digits must have at least one digit. // // Examples: -// Number Decimal -// decimal -// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5 -// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5 -// 12000 Digits: [1, 2], Exp: 5 End: 5 -// 12000.00 Digits: [1, 2], Exp: 5 End: 7 -// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3 -// 0 Digits: [], Exp: 0 End: 1 -// scientific (actual exp is Exp - Comma) -// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1 -// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0 -// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1 -// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1 -// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0 -// engineering -// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2 +// +// Number Decimal +// decimal +// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5 +// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5 +// 12000 Digits: [1, 2], Exp: 5 End: 5 +// 12000.00 Digits: [1, 2], Exp: 5 End: 7 +// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3 +// 0 Digits: [], Exp: 0 End: 1 +// scientific (actual exp is Exp - Comma) +// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1 +// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0 +// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1 +// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1 +// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0 +// engineering +// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2 type Digits struct { digits // End indicates the end position of the number. diff --git a/internal/testtext/codesize.go b/internal/testtext/codesize.go index 5fc5eaec7..88df48747 100644 --- a/internal/testtext/codesize.go +++ b/internal/testtext/codesize.go @@ -16,11 +16,13 @@ import ( // CodeSize builds the given code sample and returns the binary size or en error // if an error occurred. The code sample typically will look like this: -// package main -// import "golang.org/x/text/somepackage" -// func main() { -// somepackage.Func() // reference Func to cause it to be linked in. -// } +// +// package main +// import "golang.org/x/text/somepackage" +// func main() { +// somepackage.Func() // reference Func to cause it to be linked in. +// } +// // See dict_test.go in the display package for an example. func CodeSize(s string) (int, error) { // Write the file. diff --git a/internal/triegen/triegen.go b/internal/triegen/triegen.go index 51d218a30..de54a8075 100644 --- a/internal/triegen/triegen.go +++ b/internal/triegen/triegen.go @@ -34,23 +34,24 @@ // triegen generates both tables and code. The code is optimized to use the // automatically chosen data types. The following code is generated for a Trie // or multiple Tries named "foo": -// - type fooTrie -// The trie type. // -// - func newFooTrie(x int) *fooTrie -// Trie constructor, where x is the index of the trie passed to Gen. +// - type fooTrie +// The trie type. // -// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) -// The lookup method, where uintX is automatically chosen. +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. // -// - func lookupString, lookupUnsafe and lookupStringUnsafe -// Variants of the above. +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. // -// - var fooValues and fooIndex and any tables generated by Compacters. -// The core trie data. +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. // -// - var fooTrieHandles -// Indexes of starter blocks in case of multiple trie roots. +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. // // It is recommended that users test the generated trie by checking the returned // value for every rune. Such exhaustive tests are possible as the number of diff --git a/internal/utf8internal/utf8internal.go b/internal/utf8internal/utf8internal.go index 575cea870..e5c53b1b3 100644 --- a/internal/utf8internal/utf8internal.go +++ b/internal/utf8internal/utf8internal.go @@ -74,7 +74,7 @@ type AcceptRange struct { // AcceptRanges is a slice of AcceptRange values. For a given byte sequence b // -// AcceptRanges[First[b[0]]>>AcceptShift] +// AcceptRanges[First[b[0]]>>AcceptShift] // // will give the value of AcceptRange for the multi-byte UTF-8 sequence starting // at b[0]. diff --git a/language/display/lookup.go b/language/display/lookup.go index e6dc0e016..88307753d 100644 --- a/language/display/lookup.go +++ b/language/display/lookup.go @@ -92,10 +92,10 @@ func nameTag(langN, scrN, regN namer, x interface{}) string { // offsets for a string in data. For example, consider a header that defines // strings for the languages de, el, en, fi, and nl: // -// header{ -// data: "GermanGreekEnglishDutch", -// index: []uint16{ 0, 6, 11, 18, 18, 23 }, -// } +// header{ +// data: "GermanGreekEnglishDutch", +// index: []uint16{0, 6, 11, 18, 18, 23}, +// } // // For a language with index i, the string is defined by // data[index[i]:index[i+1]]. So the number of elements in index is always one @@ -204,9 +204,11 @@ func supportedRegions() []language.Region { // for each length, which can be used in combination with binary search to get // the index associated with a tag. // For example, a tagIndex{ -// "arenesfrruzh", // 6 2-byte tags. -// "barwae", // 2 3-byte tags. -// "", +// +// "arenesfrruzh", // 6 2-byte tags. +// "barwae", // 2 3-byte tags. +// "", +// // } // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag // "wae" had an index of 7. diff --git a/language/doc.go b/language/doc.go index 8afecd50e..212b77c90 100644 --- a/language/doc.go +++ b/language/doc.go @@ -10,18 +10,17 @@ // and provides the user with the best experience // (see https://blog.golang.org/matchlang). // -// -// Matching preferred against supported languages +// # Matching preferred against supported languages // // A Matcher for an application that supports English, Australian English, // Danish, and standard Mandarin can be created as follows: // -// var matcher = language.NewMatcher([]language.Tag{ -// language.English, // The first language is used as fallback. -// language.MustParse("en-AU"), -// language.Danish, -// language.Chinese, -// }) +// var matcher = language.NewMatcher([]language.Tag{ +// language.English, // The first language is used as fallback. +// language.MustParse("en-AU"), +// language.Danish, +// language.Chinese, +// }) // // This list of supported languages is typically implied by the languages for // which there exists translations of the user interface. @@ -30,14 +29,14 @@ // language tags. // The MatchString finds best matches for such strings: // -// handler(w http.ResponseWriter, r *http.Request) { -// lang, _ := r.Cookie("lang") -// accept := r.Header.Get("Accept-Language") -// tag, _ := language.MatchStrings(matcher, lang.String(), accept) +// handler(w http.ResponseWriter, r *http.Request) { +// lang, _ := r.Cookie("lang") +// accept := r.Header.Get("Accept-Language") +// tag, _ := language.MatchStrings(matcher, lang.String(), accept) // -// // tag should now be used for the initialization of any -// // locale-specific service. -// } +// // tag should now be used for the initialization of any +// // locale-specific service. +// } // // The Matcher's Match method can be used to match Tags directly. // @@ -48,8 +47,7 @@ // For instance, it will know that a reader of Bokmål Danish can read Norwegian // and will know that Cantonese ("yue") is a good match for "zh-HK". // -// -// Using match results +// # Using match results // // To guarantee a consistent user experience to the user it is important to // use the same language tag for the selection of any locale-specific services. @@ -58,9 +56,9 @@ // More subtly confusing is using the wrong sorting order or casing // algorithm for a certain language. // -// All the packages in x/text that provide locale-specific services -// (e.g. collate, cases) should be initialized with the tag that was -// obtained at the start of an interaction with the user. +// All the packages in x/text that provide locale-specific services +// (e.g. collate, cases) should be initialized with the tag that was +// obtained at the start of an interaction with the user. // // Note that Tag that is returned by Match and MatchString may differ from any // of the supported languages, as it may contain carried over settings from @@ -70,8 +68,7 @@ // Match and MatchString both return the index of the matched supported tag // to simplify associating such data with the matched tag. // -// -// Canonicalization +// # Canonicalization // // If one uses the Matcher to compare languages one does not need to // worry about canonicalization. @@ -92,10 +89,9 @@ // equivalence relations. The CanonType type can be used to alter the // canonicalization form. // -// References +// # References // // BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47 -// package language // import "golang.org/x/text/language" // TODO: explanation on how to match languages for your own locale-specific diff --git a/message/catalog/catalog.go b/message/catalog/catalog.go index de595b510..96955d075 100644 --- a/message/catalog/catalog.go +++ b/message/catalog/catalog.go @@ -13,8 +13,7 @@ // language. The Loader interface defines a source of dictionaries. A // translation of a format string is represented by a Message. // -// -// Catalogs +// # Catalogs // // A Catalog defines a programmatic interface for setting message translations. // It maintains a set of per-language dictionaries with translations for a set @@ -24,8 +23,7 @@ // the key. For example, a Dictionary for "en-GB" could leave out entries that // are identical to those in a dictionary for "en". // -// -// Messages +// # Messages // // A Message is a format string which varies on the value of substitution // variables. For instance, to indicate the number of results one could want "no @@ -39,8 +37,7 @@ // to selected string. This separation of concerns allows Catalog to be used to // store any kind of formatting strings. // -// -// Selecting messages based on linguistic features of substitution arguments +// # Selecting messages based on linguistic features of substitution arguments // // Messages may vary based on any linguistic features of the argument values. // The most common one is plural form, but others exist. @@ -48,10 +45,10 @@ // Selection messages are provided in packages that provide support for a // specific linguistic feature. The following snippet uses plural.Selectf: // -// catalog.Set(language.English, "You are %d minute(s) late.", -// plural.Selectf(1, "", -// plural.One, "You are 1 minute late.", -// plural.Other, "You are %d minutes late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// plural.Selectf(1, "", +// plural.One, "You are 1 minute late.", +// plural.Other, "You are %d minutes late.")) // // In this example, a message is stored in the Catalog where one of two messages // is selected based on the first argument, a number. The first message is @@ -64,47 +61,46 @@ // Selects can be nested. This allows selecting sentences based on features of // multiple arguments or multiple linguistic properties of a single argument. // -// -// String interpolation +// # String interpolation // // There is often a lot of commonality between the possible variants of a // message. For instance, in the example above the word "minute" varies based on // the plural catogory of the argument, but the rest of the sentence is // identical. Using interpolation the above message can be rewritten as: // -// catalog.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", -// plural.Selectf(1, "", plural.One, "minute", plural.Other, "minutes")), -// catalog.String("You are %[1]d ${minutes} late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", +// plural.Selectf(1, "", plural.One, "minute", plural.Other, "minutes")), +// catalog.String("You are %[1]d ${minutes} late.")) // // Var is defined to return the variable name if the message does not yield a // match. This allows us to further simplify this snippet to // -// catalog.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", plural.Selectf(1, "", plural.One, "minute")), -// catalog.String("You are %d ${minutes} late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", plural.Selectf(1, "", plural.One, "minute")), +// catalog.String("You are %d ${minutes} late.")) // // Overall this is still only a minor improvement, but things can get a lot more // unwieldy if more than one linguistic feature is used to determine a message // variant. Consider the following example: // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", -// catalog.Var("their", -// plural.Selectf(1, "" -// plural.One, gender.Select(1, "female", "her", "other", "his"))), -// catalog.Var("invites", plural.Selectf(1, "", plural.One, "invite")) -// catalog.String("%[1]v ${invites} %[2]v to ${their} party.")), +// // argument 1: list of hosts, argument 2: list of guests +// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", +// catalog.Var("their", +// plural.Selectf(1, "" +// plural.One, gender.Select(1, "female", "her", "other", "his"))), +// catalog.Var("invites", plural.Selectf(1, "", plural.One, "invite")) +// catalog.String("%[1]v ${invites} %[2]v to ${their} party.")), // // Without variable substitution, this would have to be written as // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", -// plural.Selectf(1, "", -// plural.One, gender.Select(1, -// "female", "%[1]v invites %[2]v to her party." -// "other", "%[1]v invites %[2]v to his party."), -// plural.Other, "%[1]v invites %[2]v to their party.") +// // argument 1: list of hosts, argument 2: list of guests +// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", +// plural.Selectf(1, "", +// plural.One, gender.Select(1, +// "female", "%[1]v invites %[2]v to her party." +// "other", "%[1]v invites %[2]v to his party."), +// plural.Other, "%[1]v invites %[2]v to their party.")) // // Not necessarily shorter, but using variables there is less duplication and // the messages are more maintenance friendly. Moreover, languages may have up @@ -113,33 +109,32 @@ // Different messages using the same inflections can reuse variables by moving // them to macros. Using macros we can rewrite the message as: // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.SetString(language.English, "%[1]v invite(s) %[2]v to their party.", -// "%[1]v ${invites(1)} %[2]v to ${their(1)} party.") +// // argument 1: list of hosts, argument 2: list of guests +// catalog.SetString(language.English, "%[1]v invite(s) %[2]v to their party.", +// "%[1]v ${invites(1)} %[2]v to ${their(1)} party.") // // Where the following macros were defined separately. // -// catalog.SetMacro(language.English, "invites", plural.Selectf(1, "", -// plural.One, "invite")) -// catalog.SetMacro(language.English, "their", plural.Selectf(1, "", -// plural.One, gender.Select(1, "female", "her", "other", "his"))), +// catalog.SetMacro(language.English, "invites", plural.Selectf(1, "", +// plural.One, "invite")) +// catalog.SetMacro(language.English, "their", plural.Selectf(1, "", +// plural.One, gender.Select(1, "female", "her", "other", "his"))), // // Placeholders use parentheses and the arguments to invoke a macro. // -// -// Looking up messages +// # Looking up messages // // Message lookup using Catalogs is typically only done by specialized packages // and is not something the user should be concerned with. For instance, to // express the tardiness of a user using the related message we defined earlier, // the user may use the package message like so: // -// p := message.NewPrinter(language.English) -// p.Printf("You are %d minute(s) late.", 5) +// p := message.NewPrinter(language.English) +// p.Printf("You are %d minute(s) late.", 5) // // Which would print: -// You are 5 minutes late. // +// You are 5 minutes late. // // This package is UNDER CONSTRUCTION and its API may change. package catalog // import "golang.org/x/text/message/catalog" diff --git a/message/doc.go b/message/doc.go index 72e8fde71..4bf7bdcac 100644 --- a/message/doc.go +++ b/message/doc.go @@ -5,22 +5,21 @@ // Package message implements formatted I/O for localized strings with functions // analogous to the fmt's print functions. It is a drop-in replacement for fmt. // -// -// Localized Formatting +// # Localized Formatting // // A format string can be localized by replacing any of the print functions of // fmt with an equivalent call to a Printer. // -// p := message.NewPrinter(message.MatchLanguage("en")) -// p.Println(123456.78) // Prints 123,456.78 +// p := message.NewPrinter(message.MatchLanguage("en")) +// p.Println(123456.78) // Prints 123,456.78 // -// p.Printf("%d ducks in a row", 4331) // Prints 4,331 ducks in a row +// p.Printf("%d ducks in a row", 4331) // Prints 4,331 ducks in a row // -// p := message.NewPrinter(message.MatchLanguage("nl")) -// p.Printf("Hoogte: %.1f meter", 1244.9) // Prints Hoogte: 1,244.9 meter +// p := message.NewPrinter(message.MatchLanguage("nl")) +// p.Printf("Hoogte: %.1f meter", 1244.9) // Prints Hoogte: 1,244.9 meter // -// p := message.NewPrinter(message.MatchLanguage("bn")) -// p.Println(123456.78) // Prints ১,২৩,৪৫৬.৭৮ +// p := message.NewPrinter(message.MatchLanguage("bn")) +// p.Println(123456.78) // Prints ১,২৩,৪৫৬.৭৮ // // Printer currently supports numbers and specialized types for which packages // exist in x/text. Other builtin types such as time.Time and slices are @@ -35,8 +34,7 @@ // // See package fmt for more options. // -// -// Translation +// # Translation // // The format strings that are passed to Printf, Sprintf, Fprintf, or Errorf // are used as keys to look up translations for the specified languages. @@ -44,34 +42,36 @@ // // One can use arbitrary keys to distinguish between otherwise ambiguous // strings: -// p := message.NewPrinter(language.English) -// p.Printf("archive(noun)") // Prints "archive" -// p.Printf("archive(verb)") // Prints "archive" // -// p := message.NewPrinter(language.German) -// p.Printf("archive(noun)") // Prints "Archiv" -// p.Printf("archive(verb)") // Prints "archivieren" +// p := message.NewPrinter(language.English) +// p.Printf("archive(noun)") // Prints "archive" +// p.Printf("archive(verb)") // Prints "archive" +// +// p := message.NewPrinter(language.German) +// p.Printf("archive(noun)") // Prints "Archiv" +// p.Printf("archive(verb)") // Prints "archivieren" // // To retain the fallback functionality, use Key: -// p.Printf(message.Key("archive(noun)", "archive")) -// p.Printf(message.Key("archive(verb)", "archive")) // +// p.Printf(message.Key("archive(noun)", "archive")) +// p.Printf(message.Key("archive(verb)", "archive")) // -// Translation Pipeline +// # Translation Pipeline // // Format strings that contain text need to be translated to support different // locales. The first step is to extract strings that need to be translated. // // 1. Install gotext -// go get -u golang.org/x/text/cmd/gotext -// gotext -help +// +// go get -u golang.org/x/text/cmd/gotext +// gotext -help // // 2. Mark strings in your source to be translated by using message.Printer, // instead of the functions of the fmt package. // // 3. Extract the strings from your source // -// gotext extract +// gotext extract // // The output will be written to the textdata directory. // @@ -89,13 +89,11 @@ // see also package golang.org/x/text/message/catalog can be used to implement // either dynamic or static loading of messages. // -// -// Plural and Gender Forms +// # Plural and Gender Forms // // Translated messages can vary based on the plural and gender forms of // substitution values. In general, it is up to the translators to provide // alternative translations for such forms. See the packages in // golang.org/x/text/feature and golang.org/x/text/message/catalog for more // information. -// package message diff --git a/number/doc.go b/number/doc.go index 2ad8d431a..876623086 100644 --- a/number/doc.go +++ b/number/doc.go @@ -9,19 +9,18 @@ // builtin Go types and anything that implements the Convert interface // (currently internal). // -// p := message.NewPrinter(language.English) +// p := message.NewPrinter(language.English) // -// p.Printf("%v bottles of beer on the wall.", number.Decimal(1234)) -// // Prints: 1,234 bottles of beer on the wall. +// p.Printf("%v bottles of beer on the wall.", number.Decimal(1234)) +// // Prints: 1,234 bottles of beer on the wall. // -// p.Printf("%v of gophers lose too much fur", number.Percent(0.12)) -// // Prints: 12% of gophers lose too much fur. +// p.Printf("%v of gophers lose too much fur", number.Percent(0.12)) +// // Prints: 12% of gophers lose too much fur. // -// p := message.NewPrinter(language.Dutch) -// -// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) -// // Prints: Er zijn 1,2 fietsen per huishouden. +// p := message.NewPrinter(language.Dutch) // +// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) +// // Prints: Er zijn 1,2 fietsen per huishouden. // // The width and scale specified in the formatting directives override the // configuration of the formatter. diff --git a/unicode/bidi/core.go b/unicode/bidi/core.go index fde188a33..9d2ae547b 100644 --- a/unicode/bidi/core.go +++ b/unicode/bidi/core.go @@ -193,14 +193,14 @@ func (p *paragraph) run() { // // At the end of this function: // -// - The member variable matchingPDI is set to point to the index of the -// matching PDI character for each isolate initiator character. If there is -// no matching PDI, it is set to the length of the input text. For other -// characters, it is set to -1. -// - The member variable matchingIsolateInitiator is set to point to the -// index of the matching isolate initiator character for each PDI character. -// If there is no matching isolate initiator, or the character is not a PDI, -// it is set to -1. +// - The member variable matchingPDI is set to point to the index of the +// matching PDI character for each isolate initiator character. If there is +// no matching PDI, it is set to the length of the input text. For other +// characters, it is set to -1. +// - The member variable matchingIsolateInitiator is set to point to the +// index of the matching isolate initiator character for each PDI character. +// If there is no matching isolate initiator, or the character is not a PDI, +// it is set to -1. func (p *paragraph) determineMatchingIsolates() { p.matchingPDI = make([]int, p.Len()) p.matchingIsolateInitiator = make([]int, p.Len()) @@ -435,7 +435,7 @@ func maxLevel(a, b level) level { } // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, -// either L or R, for each isolating run sequence. +// either L or R, for each isolating run sequence. func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { length := len(indexes) types := make([]Class, length) @@ -905,7 +905,7 @@ func (p *paragraph) getLevels(linebreaks []int) []level { // Lines are concatenated from left to right. So for example, the fifth // character from the left on the third line is // -// getReordering(linebreaks)[linebreaks[1] + 4] +// getReordering(linebreaks)[linebreaks[1] + 4] // // (linebreaks[1] is the position after the last character of the second // line, which is also the index of the first character on the third line, diff --git a/unicode/cldr/collate.go b/unicode/cldr/collate.go index 27c5bac9a..056fe7f7f 100644 --- a/unicode/cldr/collate.go +++ b/unicode/cldr/collate.go @@ -98,9 +98,13 @@ func processRules(p RuleProcessor, s string) (err error) { } // parseSpecialAnchor parses the anchor syntax which is either of the form -// ['before' ] +// +// ['before' ] +// // or -// [