Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions acceptance.tests
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ STDERR
END
MATCH DEFAULT

NAME filter match
RUN filter --match "(\d{3}) (\d+)" cmd/testdata/access.txt cmd/testdata/access.txt cmd/testdata/access.txt cmd/testdata/access.txt
STDERR
Matched: 200 / 200
END

NAME filter stdin
MATCH EXACT
RUN filter --match "(\d+)" --ignore "{gt {1} 10}" -e "{multi {1} 2}" -
Expand Down
2 changes: 1 addition & 1 deletion cmd/helpers/extractorBuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) {
if err != nil {
return nil, err
}
return matchers.ToFactory(d), nil
return matchers.NoFactory(d), nil
case c.IsSet("match"):
if ignoreCase {
matchExpr = "(?i)" + matchExpr
Expand Down
14 changes: 11 additions & 3 deletions pkg/extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"rare/pkg/expressions"
"rare/pkg/expressions/funclib"
"rare/pkg/matchers"
"rare/pkg/slicepool"
"sync"
"sync/atomic"
"unsafe"
Expand Down Expand Up @@ -101,6 +102,8 @@ func (s *Extractor) workerFull(output chan<- []Match) {
exprCtx := &SliceSpaceExpressionContext{
nameTable: matcher.SubexpNameTable(),
}
matchBuf := make([]int, 0, matcher.MatchBufSize())
bufCopyPool := slicepool.NewIntPool(1000 * matcher.MatchBufSize())

for batch := range s.input {
var (
Expand All @@ -114,7 +117,7 @@ func (s *Extractor) workerFull(output chan<- []Match) {

// Process each line
for idx, line := range batch.Batch {
matches := matcher.FindSubmatchIndex(line)
matches := matcher.FindSubmatchIndexDst(line, matchBuf)

if len(matches) > 0 {
// Speed is more important here than safety
Expand All @@ -139,10 +142,14 @@ func (s *Extractor) workerFull(output chan<- []Match) {
matchBatch = make([]Match, 0, len(batch.Batch))
}

// Need a copy of the local buffer (gets overwritten) to return
matchCopy := bufCopyPool.Get(len(matches))
copy(matchCopy, matches)

matchBatch = append(matchBatch, Match{
bLine: line,
Line: lineStringPtr,
Indices: matches,
Indices: matchCopy,
Extracted: extractedKey,
LineNumber: exprCtx.lineNum,
Source: batch.Source,
Expand Down Expand Up @@ -179,6 +186,7 @@ func (s *Extractor) workerSimple(output chan<- []string) {
exprCtx := &SliceSpaceExpressionContext{
nameTable: matcher.SubexpNameTable(),
}
matchBuf := make([]int, 0, matcher.MatchBufSize())

for batch := range s.input {
var (
Expand All @@ -192,7 +200,7 @@ func (s *Extractor) workerSimple(output chan<- []string) {

// Process each line
for idx, line := range batch.Batch {
matches := matcher.FindSubmatchIndex(line)
matches := matcher.FindSubmatchIndexDst(line, matchBuf)

if len(matches) > 0 {
// Speed is more important here than safety
Expand Down
36 changes: 16 additions & 20 deletions pkg/matchers/dissect/dissect.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package dissect

import (
"rare/pkg/slicepool"
"strings"
"unsafe"
)
Expand Down Expand Up @@ -31,11 +30,6 @@ type Dissect struct {
groupCount int
}

type DissectInstance struct {
*Dissect
groupPool *slicepool.IntPool
}

func CompileEx(expr string, ignoreCase bool) (*Dissect, error) {

parts := make([]token, 0)
Expand Down Expand Up @@ -130,17 +124,10 @@ func MustCompile(expr string) *Dissect {
return d
}

func (s *Dissect) CreateInstance() *DissectInstance {
return &DissectInstance{
s,
slicepool.NewIntPool((s.groupCount*2 + 2) * 1024),
}
}

// returns indexes of match [first, last, key0Start, key0End, key1Start, ...]
// nil on no match
// replicates logic from regex
func (s *DissectInstance) FindSubmatchIndex(b []byte) []int {
func (s *Dissect) FindSubmatchIndexDst(b []byte, dst []int) []int {
str := *(*string)(unsafe.Pointer(&b))

start := 0
Expand All @@ -152,8 +139,10 @@ func (s *DissectInstance) FindSubmatchIndex(b []byte) []int {
start += len(s.prefix)
}

ret := s.groupPool.Get(s.groupCount*2 + 2)
ret[0] = start - len(s.prefix)
if dst == nil {
dst = make([]int, 0, s.groupCount*2+2)
}
dst = append(dst, start-len(s.prefix), -1)

idx := 2
for _, token := range s.tokens {
Expand All @@ -169,16 +158,23 @@ func (s *DissectInstance) FindSubmatchIndex(b []byte) []int {
}

if !token.skip {
ret[idx] = start
ret[idx+1] = start + endOffset
dst = append(dst, start, start+endOffset)
idx += 2
}
start = start + endOffset + len(token.until)
}

ret[1] = start
dst[1] = start

return dst
}

func (s *Dissect) FindSubmatchIndex(b []byte) []int {
return s.FindSubmatchIndexDst(b, nil)
}

return ret
func (s *Dissect) MatchBufSize() int {
return s.groupCount*2 + 2
}

// Map of key-names to index's in FindSubmatchIndex's return
Expand Down
66 changes: 49 additions & 17 deletions pkg/matchers/dissect/dissect_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package dissect

import (
"rare/pkg/testutil"
"testing"

"github.com/stretchr/testify/assert"
)

func TestDissectBasic(t *testing.T) {
d := MustCompile("%{val};%{};%{?skip} - %{val2}").CreateInstance()
d := MustCompile("%{val};%{};%{?skip} - %{val2}")

assert.Equal(t, []int{0, 17, 0, 5, 12, 17}, d.FindSubmatchIndex([]byte("Hello;a;b - there")))

Expand All @@ -16,8 +17,9 @@ func TestDissectBasic(t *testing.T) {
"val2": 2,
}, d.SubexpNameTable())
}

func TestUtf8(t *testing.T) {
d := MustCompile("ûɾ %{key} ḝłįʈ").CreateInstance()
d := MustCompile("ûɾ %{key} ḝłįʈ")

s := []byte("Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ, ĉṓɲṩḙċťᶒțûɾ ấɖḯƥĭṩčįɳġ ḝłįʈ, șếᶑ ᶁⱺ ẽḭŭŝḿꝋď ṫĕᶆᶈṓɍ ỉñḉīḑȋᵭṵńť ṷŧ ḹẩḇőꝛế éȶ đꝍꞎôꝛȇ ᵯáꞡᶇā ąⱡîɋṹẵ.")
m := d.FindSubmatchIndex(s)
Expand All @@ -27,21 +29,21 @@ func TestUtf8(t *testing.T) {
}

func TestPrefixOnSkipKey(t *testing.T) {
d := MustCompile("prefix %{}: %{val}").CreateInstance()
d := MustCompile("prefix %{}: %{val}")

assert.Nil(t, d.FindSubmatchIndex([]byte("a: b")))
assert.Equal(t, []int{0, 11, 10, 11}, d.FindSubmatchIndex([]byte("prefix a: b")))
assert.Nil(t, d.FindSubmatchIndex([]byte("Prefix a: b")))
}

func TestEmpty(t *testing.T) {
d := MustCompile("").CreateInstance()
d := MustCompile("")
assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("hello")))
assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("")))
}

func TestNoTokens(t *testing.T) {
d := MustCompile("test").CreateInstance()
d := MustCompile("test")

assert.Nil(t, d.FindSubmatchIndex([]byte("hello there")))
assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("test")))
Expand All @@ -54,7 +56,7 @@ func TestNoTokens(t *testing.T) {
}

func TestPrefix(t *testing.T) {
d := MustCompile("mid %{val};%{val2} after").CreateInstance()
d := MustCompile("mid %{val};%{val2} after")

assert.Equal(t, []int{12, 29, 16, 19, 20, 23}, d.FindSubmatchIndex([]byte("string with mid 123;456 after k")))
assert.Nil(t, d.FindSubmatchIndex([]byte("string with mi 123;456 after k")))
Expand All @@ -63,18 +65,18 @@ func TestPrefix(t *testing.T) {
}

func TestSuffix(t *testing.T) {
d := MustCompile("%{val};%{val2} after").CreateInstance()
d := MustCompile("%{val};%{val2} after")

assert.Equal(t, []int{0, 13, 0, 3, 4, 7}, d.FindSubmatchIndex([]byte("123;456 after k")))
assert.Equal(t, []int{0, 17, 0, 7, 8, 11}, d.FindSubmatchIndex([]byte("hah 123;456 after k")))
assert.Nil(t, d.FindSubmatchIndex([]byte("123;456 boom k")))
assert.Nil(t, d.FindSubmatchIndex([]byte("")))

assert.Equal(t, []int{2, 13, 6, 13}, MustCompile("end %{nada}").CreateInstance().FindSubmatchIndex([]byte("a end nothing")))
assert.Equal(t, []int{2, 13, 6, 13}, MustCompile("end %{nada}").FindSubmatchIndex([]byte("a end nothing")))
}

func TestNoPrefixSuffix(t *testing.T) {
d := MustCompile("%{onlymatch}").CreateInstance()
d := MustCompile("%{onlymatch}")
assert.Equal(t, []int{0, 5, 0, 5}, d.FindSubmatchIndex([]byte("a b c")))
}

Expand Down Expand Up @@ -102,24 +104,54 @@ func TestIgnoreCase(t *testing.T) {
d, err := CompileEx("TeSt1", true)

assert.NoError(t, err)
assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("test1")))
assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("tEst1")))
assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("TEST1")))
assert.Equal(t, []int{1, 6}, d.CreateInstance().FindSubmatchIndex([]byte("ATest123")))
assert.Nil(t, d.CreateInstance().FindSubmatchIndex([]byte("asdf")))
assert.Equal(t, []int{0, 5}, d.FindSubmatchIndex([]byte("test1")))
assert.Equal(t, []int{0, 5}, d.FindSubmatchIndex([]byte("tEst1")))
assert.Equal(t, []int{0, 5}, d.FindSubmatchIndex([]byte("TEST1")))
assert.Equal(t, []int{1, 6}, d.FindSubmatchIndex([]byte("ATest123")))
assert.Nil(t, d.FindSubmatchIndex([]byte("asdf")))

d, err = CompileEx("pref %{val} post", true)
assert.NoError(t, err)
assert.Equal(t, []int{2, 13, 7, 8}, d.CreateInstance().FindSubmatchIndex([]byte("a Pref 5 pOst")))
assert.Equal(t, []int{2, 13, 7, 8}, d.FindSubmatchIndex([]byte("a Pref 5 pOst")))
}

func TestMemoryTarget(t *testing.T) {
d := MustCompile("prefix %{}: %{val}")
str := []byte("prefix a: 123")

buf := make([]int, 0, d.MatchBufSize())
ret := d.FindSubmatchIndexDst([]byte(str), buf)
assert.Equal(t, []int{0, 13, 10, 13}, ret)
testutil.AssertSameMemory(t, buf, ret)

// undersized
buf = make([]int, 0, 2)
ret = d.FindSubmatchIndexDst([]byte(str), buf)
assert.Equal(t, []int{0, 13, 10, 13}, ret)
testutil.AssertNotSameMemory(t, buf, ret)
}

func TestDstZeroAlloc(t *testing.T) {
testutil.AssertZeroAlloc(t, BenchmarkDissectDst)
}

// BenchmarkDissect-4 13347456 86.07 ns/op 32 B/op 0 allocs/op
func BenchmarkDissect(b *testing.B) {
d, _ := CompileEx("t%{val} ", false)
di := d.CreateInstance()
val := []byte("this is a test ")

for i := 0; i < b.N; i++ {
di.FindSubmatchIndex(val)
d.FindSubmatchIndex(val)
}
}

// BenchmarkDissectDst-4 23545326 45.58 ns/op 0 B/op 0 allocs/op
func BenchmarkDissectDst(b *testing.B) {
d, _ := CompileEx("t%{val} ", false)
buf := make([]int, 0, d.MatchBufSize())
val := []byte("this is a test ")

for i := 0; i < b.N; i++ {
d.FindSubmatchIndexDst(val, buf)
}
}
13 changes: 13 additions & 0 deletions pkg/matchers/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,16 @@ func (s *factoryWrapper[T]) CreateInstance() Matcher {
func ToFactory[T Matcher](f LikeFactory[T]) Factory {
return &factoryWrapper[T]{f}
}

type noFactoryWrapper struct {
Matcher
}

func (s *noFactoryWrapper) CreateInstance() Matcher {
return s
}

// Creates a wrapper factory for a matcher that doesn't need an instance factory
func NoFactory(f Matcher) Factory {
return &noFactoryWrapper{f}
}
3 changes: 3 additions & 0 deletions pkg/matchers/fastregex/fastregex.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ type Regexp interface {
MatchString(str string) bool
FindSubmatchIndex(b []byte) []int
SubexpNameTable() map[string]int

FindSubmatchIndexDst(b []byte, dst []int) []int
MatchBufSize() int
}

// In addition, the following must be provided
Expand Down
Loading
Loading