diff --git a/acceptance.tests b/acceptance.tests index e9ea172..52f18d9 100644 --- a/acceptance.tests +++ b/acceptance.tests @@ -16,6 +16,12 @@ STDERR END MATCH DEFAULT +NAME filter match +RUN filter --match "(\d{3}) (\d+)" cmd/testdata/access.txt cmd/testdata/access.txt cmd/testdata/access.txt cmd/testdata/access.txt +STDERR + Matched: 200 / 200 +END + NAME filter stdin MATCH EXACT RUN filter --match "(\d+)" --ignore "{gt {1} 10}" -e "{multi {1} 2}" - diff --git a/cmd/helpers/extractorBuilder.go b/cmd/helpers/extractorBuilder.go index 3aca277..a803463 100644 --- a/cmd/helpers/extractorBuilder.go +++ b/cmd/helpers/extractorBuilder.go @@ -120,7 +120,7 @@ func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) { if err != nil { return nil, err } - return matchers.ToFactory(d), nil + return matchers.NoFactory(d), nil case c.IsSet("match"): if ignoreCase { matchExpr = "(?i)" + matchExpr diff --git a/pkg/extractor/extractor.go b/pkg/extractor/extractor.go index 308457c..b85c47a 100644 --- a/pkg/extractor/extractor.go +++ b/pkg/extractor/extractor.go @@ -4,6 +4,7 @@ import ( "rare/pkg/expressions" "rare/pkg/expressions/funclib" "rare/pkg/matchers" + "rare/pkg/slicepool" "sync" "sync/atomic" "unsafe" @@ -101,6 +102,8 @@ func (s *Extractor) workerFull(output chan<- []Match) { exprCtx := &SliceSpaceExpressionContext{ nameTable: matcher.SubexpNameTable(), } + matchBuf := make([]int, 0, matcher.MatchBufSize()) + bufCopyPool := slicepool.NewIntPool(1000 * matcher.MatchBufSize()) for batch := range s.input { var ( @@ -114,7 +117,7 @@ func (s *Extractor) workerFull(output chan<- []Match) { // Process each line for idx, line := range batch.Batch { - matches := matcher.FindSubmatchIndex(line) + matches := matcher.FindSubmatchIndexDst(line, matchBuf) if len(matches) > 0 { // Speed is more important here than safety @@ -139,10 +142,14 @@ func (s *Extractor) workerFull(output chan<- []Match) { matchBatch = make([]Match, 0, len(batch.Batch)) } + // Need a copy of the local buffer (gets overwritten) to return + matchCopy := bufCopyPool.Get(len(matches)) + copy(matchCopy, matches) + matchBatch = append(matchBatch, Match{ bLine: line, Line: lineStringPtr, - Indices: matches, + Indices: matchCopy, Extracted: extractedKey, LineNumber: exprCtx.lineNum, Source: batch.Source, @@ -179,6 +186,7 @@ func (s *Extractor) workerSimple(output chan<- []string) { exprCtx := &SliceSpaceExpressionContext{ nameTable: matcher.SubexpNameTable(), } + matchBuf := make([]int, 0, matcher.MatchBufSize()) for batch := range s.input { var ( @@ -192,7 +200,7 @@ func (s *Extractor) workerSimple(output chan<- []string) { // Process each line for idx, line := range batch.Batch { - matches := matcher.FindSubmatchIndex(line) + matches := matcher.FindSubmatchIndexDst(line, matchBuf) if len(matches) > 0 { // Speed is more important here than safety diff --git a/pkg/matchers/dissect/dissect.go b/pkg/matchers/dissect/dissect.go index a7ac268..fa7697c 100644 --- a/pkg/matchers/dissect/dissect.go +++ b/pkg/matchers/dissect/dissect.go @@ -1,7 +1,6 @@ package dissect import ( - "rare/pkg/slicepool" "strings" "unsafe" ) @@ -31,11 +30,6 @@ type Dissect struct { groupCount int } -type DissectInstance struct { - *Dissect - groupPool *slicepool.IntPool -} - func CompileEx(expr string, ignoreCase bool) (*Dissect, error) { parts := make([]token, 0) @@ -130,17 +124,10 @@ func MustCompile(expr string) *Dissect { return d } -func (s *Dissect) CreateInstance() *DissectInstance { - return &DissectInstance{ - s, - slicepool.NewIntPool((s.groupCount*2 + 2) * 1024), - } -} - // returns indexes of match [first, last, key0Start, key0End, key1Start, ...] // nil on no match // replicates logic from regex -func (s *DissectInstance) FindSubmatchIndex(b []byte) []int { +func (s *Dissect) FindSubmatchIndexDst(b []byte, dst []int) []int { str := *(*string)(unsafe.Pointer(&b)) start := 0 @@ -152,8 +139,10 @@ func (s *DissectInstance) FindSubmatchIndex(b []byte) []int { start += len(s.prefix) } - ret := s.groupPool.Get(s.groupCount*2 + 2) - ret[0] = start - len(s.prefix) + if dst == nil { + dst = make([]int, 0, s.groupCount*2+2) + } + dst = append(dst, start-len(s.prefix), -1) idx := 2 for _, token := range s.tokens { @@ -169,16 +158,23 @@ func (s *DissectInstance) FindSubmatchIndex(b []byte) []int { } if !token.skip { - ret[idx] = start - ret[idx+1] = start + endOffset + dst = append(dst, start, start+endOffset) idx += 2 } start = start + endOffset + len(token.until) } - ret[1] = start + dst[1] = start + + return dst +} + +func (s *Dissect) FindSubmatchIndex(b []byte) []int { + return s.FindSubmatchIndexDst(b, nil) +} - return ret +func (s *Dissect) MatchBufSize() int { + return s.groupCount*2 + 2 } // Map of key-names to index's in FindSubmatchIndex's return diff --git a/pkg/matchers/dissect/dissect_test.go b/pkg/matchers/dissect/dissect_test.go index 93c0d01..1f08a09 100644 --- a/pkg/matchers/dissect/dissect_test.go +++ b/pkg/matchers/dissect/dissect_test.go @@ -1,13 +1,14 @@ package dissect import ( + "rare/pkg/testutil" "testing" "github.com/stretchr/testify/assert" ) func TestDissectBasic(t *testing.T) { - d := MustCompile("%{val};%{};%{?skip} - %{val2}").CreateInstance() + d := MustCompile("%{val};%{};%{?skip} - %{val2}") assert.Equal(t, []int{0, 17, 0, 5, 12, 17}, d.FindSubmatchIndex([]byte("Hello;a;b - there"))) @@ -16,8 +17,9 @@ func TestDissectBasic(t *testing.T) { "val2": 2, }, d.SubexpNameTable()) } + func TestUtf8(t *testing.T) { - d := MustCompile("ûɾ %{key} ḝłįʈ").CreateInstance() + d := MustCompile("ûɾ %{key} ḝłįʈ") s := []byte("Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ, ĉṓɲṩḙċťᶒțûɾ ấɖḯƥĭṩčįɳġ ḝłįʈ, șếᶑ ᶁⱺ ẽḭŭŝḿꝋď ṫĕᶆᶈṓɍ ỉñḉīḑȋᵭṵńť ṷŧ ḹẩḇőꝛế éȶ đꝍꞎôꝛȇ ᵯáꞡᶇā ąⱡîɋṹẵ.") m := d.FindSubmatchIndex(s) @@ -27,7 +29,7 @@ func TestUtf8(t *testing.T) { } func TestPrefixOnSkipKey(t *testing.T) { - d := MustCompile("prefix %{}: %{val}").CreateInstance() + d := MustCompile("prefix %{}: %{val}") assert.Nil(t, d.FindSubmatchIndex([]byte("a: b"))) assert.Equal(t, []int{0, 11, 10, 11}, d.FindSubmatchIndex([]byte("prefix a: b"))) @@ -35,13 +37,13 @@ func TestPrefixOnSkipKey(t *testing.T) { } func TestEmpty(t *testing.T) { - d := MustCompile("").CreateInstance() + d := MustCompile("") assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("hello"))) assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte(""))) } func TestNoTokens(t *testing.T) { - d := MustCompile("test").CreateInstance() + d := MustCompile("test") assert.Nil(t, d.FindSubmatchIndex([]byte("hello there"))) assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("test"))) @@ -54,7 +56,7 @@ func TestNoTokens(t *testing.T) { } func TestPrefix(t *testing.T) { - d := MustCompile("mid %{val};%{val2} after").CreateInstance() + d := MustCompile("mid %{val};%{val2} after") assert.Equal(t, []int{12, 29, 16, 19, 20, 23}, d.FindSubmatchIndex([]byte("string with mid 123;456 after k"))) assert.Nil(t, d.FindSubmatchIndex([]byte("string with mi 123;456 after k"))) @@ -63,18 +65,18 @@ func TestPrefix(t *testing.T) { } func TestSuffix(t *testing.T) { - d := MustCompile("%{val};%{val2} after").CreateInstance() + d := MustCompile("%{val};%{val2} after") assert.Equal(t, []int{0, 13, 0, 3, 4, 7}, d.FindSubmatchIndex([]byte("123;456 after k"))) assert.Equal(t, []int{0, 17, 0, 7, 8, 11}, d.FindSubmatchIndex([]byte("hah 123;456 after k"))) assert.Nil(t, d.FindSubmatchIndex([]byte("123;456 boom k"))) assert.Nil(t, d.FindSubmatchIndex([]byte(""))) - assert.Equal(t, []int{2, 13, 6, 13}, MustCompile("end %{nada}").CreateInstance().FindSubmatchIndex([]byte("a end nothing"))) + assert.Equal(t, []int{2, 13, 6, 13}, MustCompile("end %{nada}").FindSubmatchIndex([]byte("a end nothing"))) } func TestNoPrefixSuffix(t *testing.T) { - d := MustCompile("%{onlymatch}").CreateInstance() + d := MustCompile("%{onlymatch}") assert.Equal(t, []int{0, 5, 0, 5}, d.FindSubmatchIndex([]byte("a b c"))) } @@ -102,24 +104,54 @@ func TestIgnoreCase(t *testing.T) { d, err := CompileEx("TeSt1", true) assert.NoError(t, err) - assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("test1"))) - assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("tEst1"))) - assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("TEST1"))) - assert.Equal(t, []int{1, 6}, d.CreateInstance().FindSubmatchIndex([]byte("ATest123"))) - assert.Nil(t, d.CreateInstance().FindSubmatchIndex([]byte("asdf"))) + assert.Equal(t, []int{0, 5}, d.FindSubmatchIndex([]byte("test1"))) + assert.Equal(t, []int{0, 5}, d.FindSubmatchIndex([]byte("tEst1"))) + assert.Equal(t, []int{0, 5}, d.FindSubmatchIndex([]byte("TEST1"))) + assert.Equal(t, []int{1, 6}, d.FindSubmatchIndex([]byte("ATest123"))) + assert.Nil(t, d.FindSubmatchIndex([]byte("asdf"))) d, err = CompileEx("pref %{val} post", true) assert.NoError(t, err) - assert.Equal(t, []int{2, 13, 7, 8}, d.CreateInstance().FindSubmatchIndex([]byte("a Pref 5 pOst"))) + assert.Equal(t, []int{2, 13, 7, 8}, d.FindSubmatchIndex([]byte("a Pref 5 pOst"))) +} + +func TestMemoryTarget(t *testing.T) { + d := MustCompile("prefix %{}: %{val}") + str := []byte("prefix a: 123") + + buf := make([]int, 0, d.MatchBufSize()) + ret := d.FindSubmatchIndexDst([]byte(str), buf) + assert.Equal(t, []int{0, 13, 10, 13}, ret) + testutil.AssertSameMemory(t, buf, ret) + + // undersized + buf = make([]int, 0, 2) + ret = d.FindSubmatchIndexDst([]byte(str), buf) + assert.Equal(t, []int{0, 13, 10, 13}, ret) + testutil.AssertNotSameMemory(t, buf, ret) +} + +func TestDstZeroAlloc(t *testing.T) { + testutil.AssertZeroAlloc(t, BenchmarkDissectDst) } // BenchmarkDissect-4 13347456 86.07 ns/op 32 B/op 0 allocs/op func BenchmarkDissect(b *testing.B) { d, _ := CompileEx("t%{val} ", false) - di := d.CreateInstance() val := []byte("this is a test ") for i := 0; i < b.N; i++ { - di.FindSubmatchIndex(val) + d.FindSubmatchIndex(val) + } +} + +// BenchmarkDissectDst-4 23545326 45.58 ns/op 0 B/op 0 allocs/op +func BenchmarkDissectDst(b *testing.B) { + d, _ := CompileEx("t%{val} ", false) + buf := make([]int, 0, d.MatchBufSize()) + val := []byte("this is a test ") + + for i := 0; i < b.N; i++ { + d.FindSubmatchIndexDst(val, buf) } } diff --git a/pkg/matchers/factory.go b/pkg/matchers/factory.go index 5fcdda2..aab7ccf 100644 --- a/pkg/matchers/factory.go +++ b/pkg/matchers/factory.go @@ -16,3 +16,16 @@ func (s *factoryWrapper[T]) CreateInstance() Matcher { func ToFactory[T Matcher](f LikeFactory[T]) Factory { return &factoryWrapper[T]{f} } + +type noFactoryWrapper struct { + Matcher +} + +func (s *noFactoryWrapper) CreateInstance() Matcher { + return s +} + +// Creates a wrapper factory for a matcher that doesn't need an instance factory +func NoFactory(f Matcher) Factory { + return &noFactoryWrapper{f} +} diff --git a/pkg/matchers/fastregex/fastregex.go b/pkg/matchers/fastregex/fastregex.go index b20a8da..27d3424 100644 --- a/pkg/matchers/fastregex/fastregex.go +++ b/pkg/matchers/fastregex/fastregex.go @@ -14,6 +14,9 @@ type Regexp interface { MatchString(str string) bool FindSubmatchIndex(b []byte) []int SubexpNameTable() map[string]int + + FindSubmatchIndexDst(b []byte, dst []int) []int + MatchBufSize() int } // In addition, the following must be provided diff --git a/pkg/matchers/fastregex/pcre2.go b/pkg/matchers/fastregex/pcre2.go index a893893..08a2046 100644 --- a/pkg/matchers/fastregex/pcre2.go +++ b/pkg/matchers/fastregex/pcre2.go @@ -1,4 +1,4 @@ -//go:build linux && cgo && pcre2 +//go:build pcre2 package fastregex @@ -15,7 +15,6 @@ import "C" import ( "errors" "fmt" - "rare/pkg/slicepool" "runtime" "unsafe" ) @@ -36,8 +35,7 @@ var _ CompiledRegexp = &pcre2Compiled{} // instance version type pcre2Regexp struct { - re *pcre2Compiled - groupPool *slicepool.IntPool + re *pcre2Compiled matchData *C.pcre2_match_data context *C.pcre2_match_context @@ -95,8 +93,7 @@ func CompileEx(expr string, posix bool) (CompiledRegexp, error) { func (s *pcre2Compiled) CreateInstance() Regexp { pcre := &pcre2Regexp{ - re: s, - groupPool: slicepool.NewIntPool(32 * 1024), + re: s, } if s.jitted { @@ -151,7 +148,7 @@ func (s *pcre2Regexp) MatchString(str string) bool { // FindSubmatchIndex, like regexp, returns a set of string indices where the results are // FindSubmatchIndex is NOT thread-safe. You need to create an instance of the fastregex engine -func (s *pcre2Regexp) FindSubmatchIndex(b []byte) []int { +func (s *pcre2Regexp) FindSubmatchIndexDst(b []byte, dst []int) []int { if len(b) == 0 { return nil } @@ -163,12 +160,22 @@ func (s *pcre2Regexp) FindSubmatchIndex(b []byte) []int { return nil } - ret := s.groupPool.Get(s.re.groupCount * 2) + if dst == nil { + dst = make([]int, 0, s.re.groupCount*2) + } for i := 0; i < s.re.groupCount*2; i++ { - ret[i] = int(*(*C.ulong)(unsafe.Pointer(uintptr(unsafe.Pointer(s.ovec)) + unsafe.Sizeof(*s.ovec)*uintptr(i)))) + dst = append(dst, int(*(*C.ulong)(unsafe.Pointer(uintptr(unsafe.Pointer(s.ovec)) + unsafe.Sizeof(*s.ovec)*uintptr(i))))) } - return ret + return dst +} + +func (s *pcre2Regexp) FindSubmatchIndex(b []byte) []int { + return s.FindSubmatchIndexDst(b, nil) +} + +func (s *pcre2Regexp) MatchBufSize() int { + return s.re.groupCount * 2 } type compileError struct { diff --git a/pkg/matchers/fastregex/pcre2_test.go b/pkg/matchers/fastregex/pcre2_test.go index 2754540..6782395 100644 --- a/pkg/matchers/fastregex/pcre2_test.go +++ b/pkg/matchers/fastregex/pcre2_test.go @@ -1,8 +1,9 @@ -//go:build linux && cgo && pcre2 +//go:build pcre2 package fastregex import ( + "rare/pkg/testutil" "testing" "github.com/stretchr/testify/assert" @@ -14,6 +15,19 @@ func TestCaptureGroupCount(t *testing.T) { assert.Equal(t, 3, re.CreateInstance().(*pcre2Regexp).GroupCount()) } +func TestPCRESameMemory(t *testing.T) { + re := MustCompile(`t(\w+)`).CreateInstance() + sb := []byte("this is a test") + buf := make([]int, 0, re.MatchBufSize()) + + ret := re.FindSubmatchIndexDst(sb, buf) + testutil.AssertSameMemory(t, buf, ret) +} + +func TestPCREZeroAlloc(t *testing.T) { + testutil.AssertZeroAlloc(t, BenchmarkPCREDst) +} + // pcre1: 500ns // pcre1-jit: 400ns // pcre2: 542ns @@ -36,3 +50,13 @@ func BenchmarkPCREMatch(b *testing.B) { re.MatchString("this is a test") } } + +// BenchmarkPCREDst-4 5823752 206.7 ns/op 0 B/op 0 allocs/op +func BenchmarkPCREDst(b *testing.B) { + re := MustCompile(`t(\w+)`).CreateInstance() + sb := []byte("this is a test") + buf := make([]int, 0, re.MatchBufSize()) + for i := 0; i < b.N; i++ { + re.FindSubmatchIndexDst(sb, buf) + } +} diff --git a/pkg/matchers/fastregex/re2.go b/pkg/matchers/fastregex/re2.go index 564a3e0..650bd30 100644 --- a/pkg/matchers/fastregex/re2.go +++ b/pkg/matchers/fastregex/re2.go @@ -1,8 +1,12 @@ -//go:build !(linux && cgo && pcre2) +//go:build !pcre2 package fastregex -import "regexp" +import ( + "io" + "regexp" + _ "unsafe" +) /* The fallback exposes the re2/regexp go implementation in the @@ -14,6 +18,7 @@ const Version = "re2" type compiledRegexp struct { *regexp.Regexp groupNames map[string]int + bufSize int } var ( @@ -29,6 +34,28 @@ func (s *compiledRegexp) SubexpNameTable() map[string]int { return s.groupNames } +//go:linkname regexp_doExecute regexp.(*Regexp).doExecute +func regexp_doExecute(*regexp.Regexp, io.RuneReader, []byte, string, int, int, []int) []int + +func (s *compiledRegexp) FindSubmatchIndexDst(b []byte, dst []int) []int { + // HACK: By accessing the underlying function of FindSubmatchIndex, we're able to avoid + // an allocation done by the initial call, which seems to save 25-33% performance generally + // and also later gc cleanups and about 15% heap allocations + // Though hacky, this should be safe for a pinned version, and will have plenty of tests around it + ret := regexp_doExecute(s.Regexp, nil, b, "", 0, s.bufSize, dst) + if ret == nil { + return nil + } + for len(ret) < s.bufSize { + ret = append(ret, -1) + } + return ret +} + +func (s *compiledRegexp) MatchBufSize() int { + return s.bufSize +} + func CompileEx(expr string, posix bool) (CompiledRegexp, error) { re, err := buildRegexp(expr, posix) if err != nil { @@ -37,6 +64,7 @@ func CompileEx(expr string, posix bool) (CompiledRegexp, error) { return &compiledRegexp{ re, createGroupNameTable(re), + (re.NumSubexp() + 1) * 2, }, nil } diff --git a/pkg/matchers/fastregex/re2_test.go b/pkg/matchers/fastregex/re2_test.go index 3525447..934ddbc 100644 --- a/pkg/matchers/fastregex/re2_test.go +++ b/pkg/matchers/fastregex/re2_test.go @@ -1,8 +1,13 @@ +//go:build !pcre2 + package fastregex import ( + "rare/pkg/testutil" "regexp" "testing" + + "github.com/stretchr/testify/assert" ) // 305ns @@ -13,10 +18,70 @@ func BenchmarkRE2Match(b *testing.B) { } } -// 520ns +// BenchmarkRE2SubMatch-4 2846446 431.0 ns/op 32 B/op 1 allocs/op func BenchmarkRE2SubMatch(b *testing.B) { re := regexp.MustCompile(`t(\w+)`) + str := []byte("this is a test") for i := 0; i < b.N; i++ { - re.FindSubmatchIndex([]byte("this is a test")) + re.FindSubmatchIndex(str) + } +} + +func TestMemoryAssumptions(t *testing.T) { + r := MustCompile(`t(\w+)`) + str := []byte("this is a test") + ri := r.CreateInstance() + + // Same memory + t.Run("same memory", func(t *testing.T) { + buf := make([]int, 0, ri.MatchBufSize()) + ret := ri.FindSubmatchIndexDst(str, buf) + assert.Equal(t, []int{0, 4, 1, 4}, ret) + testutil.AssertSameMemory(t, ret, buf) + }) + + // undersized alloc + t.Run("undersized", func(t *testing.T) { + buf := make([]int, 0, 2) + ret := ri.FindSubmatchIndexDst(str, buf) + assert.Equal(t, []int{0, 4, 1, 4}, ret) + testutil.AssertNotSameMemory(t, ret, buf) + }) + + // Nil buf alloc + t.Run("nilbuf", func(t *testing.T) { + ret := ri.FindSubmatchIndexDst(str, nil) + assert.Equal(t, []int{0, 4, 1, 4}, ret) + }) + + // No match + t.Run("no match", func(t *testing.T) { + ret := ri.FindSubmatchIndexDst([]byte("none"), nil) + assert.Nil(t, ret) + }) + + // pad match + t.Run("pad match", func(t *testing.T) { + r := MustCompile(`(\d+)(?: (\d+))?`).CreateInstance() + buf := make([]int, 0, r.MatchBufSize()) + ret := r.FindSubmatchIndexDst([]byte("123"), buf) + assert.Equal(t, []int{0, 3, 0, 3, -1, -1}, ret) + testutil.AssertSameMemory(t, buf, ret) + }) +} + +func TestDstZeroAlloc(t *testing.T) { + testutil.AssertZeroAlloc(t, BenchmarkRE2WithDst) +} + +// BenchmarkRE2WithDst-4 3982665 295.1 ns/op 0 B/op 0 allocs/op +func BenchmarkRE2WithDst(b *testing.B) { + r := MustCompile(`t(\w+)`) + str := []byte("this is a test") + ri := r.CreateInstance() + buf := make([]int, 0, ri.MatchBufSize()) + + for range b.N { + ri.FindSubmatchIndexDst(str, buf) } } diff --git a/pkg/matchers/intf.go b/pkg/matchers/intf.go index 3a9b3fe..a43961c 100644 --- a/pkg/matchers/intf.go +++ b/pkg/matchers/intf.go @@ -9,4 +9,9 @@ type Factory interface { type Matcher interface { FindSubmatchIndex(b []byte) []int SubexpNameTable() map[string]int + + // Insert FindSubmatchIndex into a preexisting buffer of size MatchBufSize() (no memory alloc) + // If dst is nil, or undersized, it will be resized/alloc'd + FindSubmatchIndexDst(b []byte, dst []int) []int + MatchBufSize() int } diff --git a/pkg/matchers/simple.go b/pkg/matchers/simple.go index 6f8c3d4..6a7f20a 100644 --- a/pkg/matchers/simple.go +++ b/pkg/matchers/simple.go @@ -13,3 +13,11 @@ func (s *AlwaysMatch) FindSubmatchIndex(b []byte) []int { func (s *AlwaysMatch) SubexpNameTable() map[string]int { return make(map[string]int) } + +func (s *AlwaysMatch) FindSubmatchIndexDst(b []byte, dst []int) []int { + return append(dst, 0, len(b)) +} + +func (s *AlwaysMatch) MatchBufSize() int { + return 2 +} diff --git a/pkg/matchers/simple_test.go b/pkg/matchers/simple_test.go index 5d2b146..a5e7c8d 100644 --- a/pkg/matchers/simple_test.go +++ b/pkg/matchers/simple_test.go @@ -1,6 +1,7 @@ package matchers import ( + "rare/pkg/testutil" "testing" "github.com/stretchr/testify/assert" @@ -14,4 +15,19 @@ func TestSimpleMatcherAndFactory(t *testing.T) { assert.Equal(t, []int{0, 0}, inst.FindSubmatchIndex([]byte{})) assert.Equal(t, []int{0, 2}, inst.FindSubmatchIndex([]byte("hi"))) + assert.Equal(t, []int{0, 2}, inst.FindSubmatchIndexDst([]byte("hi"), nil)) + assert.Equal(t, []int{0, 2}, inst.FindSubmatchIndexDst([]byte("hi"), make([]int, 0, inst.MatchBufSize()))) +} + +func TestNoFactory(t *testing.T) { + matcher := NoFactory(&AlwaysMatch{}).CreateInstance() + assert.Equal(t, 2, matcher.MatchBufSize()) +} + +func TestSimpleMemory(t *testing.T) { + matcher := &AlwaysMatch{} + buf := make([]int, 0, matcher.MatchBufSize()) + ret := matcher.FindSubmatchIndexDst([]byte("abc"), buf) + assert.Equal(t, []int{0, 3}, ret) + testutil.AssertSameMemory(t, buf, ret) } diff --git a/pkg/testutil/mem.go b/pkg/testutil/mem.go index eb7fb28..933916a 100644 --- a/pkg/testutil/mem.go +++ b/pkg/testutil/mem.go @@ -8,6 +8,8 @@ import ( var IsRaceMode = false func AssertZeroAlloc(t *testing.T, bench func(b *testing.B)) { + t.Helper() + if testing.Short() { t.Skip("skip short tests") } @@ -27,6 +29,7 @@ func AssertZeroAlloc(t *testing.T, bench func(b *testing.B)) { // Asserts two arrays/slices share the same memory block func AssertSameMemory[T any](t *testing.T, arr0, arr1 []T) { + t.Helper() if !IsSameMemory(arr0, arr1) { t.Error("Slices don't share underlying memory") } @@ -34,6 +37,7 @@ func AssertSameMemory[T any](t *testing.T, arr0, arr1 []T) { // Asserts two arrays/slices do NOT share the same memory block func AssertNotSameMemory[T any](t *testing.T, arr0, arr1 []T) { + t.Helper() if IsSameMemory(arr0, arr1) { t.Error("Slices don't share underlying memory") }