From c12ebacd37a6c9160c0419320bc6bf1c114c32c4 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Tue, 31 Dec 2024 16:03:25 -0500
Subject: [PATCH 01/14] First dissect implementation. 5-10x faster than re2.
 20% faster than pcre

---
 pkg/dissect/dissect.go      | 173 ++++++++++++++++++++++++++++++++++++
 pkg/dissect/dissect_test.go |  92 +++++++++++++++++++
 pkg/dissect/errors.go       |   9 ++
 3 files changed, 274 insertions(+)
 create mode 100644 pkg/dissect/dissect.go
 create mode 100644 pkg/dissect/dissect_test.go
 create mode 100644 pkg/dissect/errors.go

diff --git a/pkg/dissect/dissect.go b/pkg/dissect/dissect.go
new file mode 100644
index 00000000..400d02c9
--- /dev/null
+++ b/pkg/dissect/dissect.go
@@ -0,0 +1,173 @@
+package dissect
+
+import (
+	"rare/pkg/slicepool"
+	"strings"
+	"unsafe"
+)
+
+// https://www.elastic.co/guide/en/logstash/current/plugins-filters-dissect.html
+
+// Because of how rare works, and the need to implement `FindSubmatchIndex`
+// this is a subset of functionality
+// %{key} -- Named key
+// %{} or %{?key} -- Named skipped key
+// %{+key} -- Append key, but MUST appear in-order, and will include delim
+// Does NOT support reference keys directly
+
+// Like fastregex, Dissect is NOT thread-safe, and an instance should be created
+// per-thread, or it should be locked. This is primarily because of the memory pool
+
+type token struct {
+	name, until string
+	skip        bool
+}
+
+type Dissect struct {
+	tokens []token
+	prefix string
+
+	groupNames map[string]int
+	groupCount int
+}
+
+type DissectInstance struct {
+	*Dissect
+	groupPool *slicepool.IntPool
+}
+
+func New(expr string) (*Dissect, error) {
+
+	parts := make([]token, 0)
+	groupNames := make(map[string]int)
+	var prefix string
+
+	groupIndex := 0
+	for {
+		start := strings.Index(expr, "%{")
+		if start < 0 {
+			if groupIndex == 0 { // no tokens in expr
+				prefix = expr
+			}
+			break
+		}
+		if groupIndex == 0 {
+			prefix = expr[:start]
+		}
+		expr = expr[start+2:]
+
+		stop := strings.Index(expr, "}")
+		if stop < 0 {
+			return nil, ErrorUnclosedToken
+		}
+
+		keyName := expr[:stop]
+		expr = expr[stop+1:]
+
+		// end is the next token OR end of expr
+		end := strings.Index(expr, "%")
+		if end < 0 {
+			end = len(expr)
+		} else if end == 0 {
+			return nil, ErrorSequentialToken
+		}
+		keyUntil := expr[:end]
+		expr = expr[end:]
+
+		skipped := false
+
+		switch {
+		case len(keyName) == 0:
+			skipped = true
+		case keyName[0] == '?':
+			skipped = true
+			keyName = keyName[1:]
+			// TODO: Append
+		}
+
+		parts = append(parts, token{
+			name:  keyName,
+			until: keyUntil,
+			skip:  skipped,
+		})
+
+		if !skipped {
+			if _, ok := groupNames[keyName]; ok {
+				return nil, ErrorKeyConflict
+			}
+			groupIndex++
+			groupNames[keyName] = groupIndex
+		}
+	}
+
+	return &Dissect{
+		groupNames: groupNames,
+		groupCount: groupIndex,
+		tokens:     parts,
+		prefix:     prefix,
+	}, nil
+}
+
+func MustNew(expr string) *Dissect {
+	d, err := New(expr)
+	if err != nil {
+		panic(err)
+	}
+	return d
+}
+
+func (s *Dissect) CreateInstance() *DissectInstance {
+	return &DissectInstance{
+		s,
+		slicepool.NewIntPool((s.groupCount*2 + 2) * 1024),
+	}
+}
+
+// returns indexes of match [first, last, key0Start, key0End, key1Start, ...]
+// nil on no match
+// replicates logic from regex
+func (s *DissectInstance) FindSubmatchIndex(b []byte) []int {
+	str := *(*string)(unsafe.Pointer(&b))
+
+	start := 0
+	if s.prefix != "" {
+		start = strings.Index(str, s.prefix)
+		if start < 0 {
+			return nil
+		}
+		start += len(s.prefix)
+	}
+
+	ret := s.groupPool.Get(s.groupCount*2 + 2)
+	ret[0] = start - len(s.prefix)
+
+	idx := 2
+	for _, token := range s.tokens {
+
+		endOffset := 0
+		if token.until == "" {
+			endOffset = len(str[start:])
+		} else {
+			endOffset = strings.Index(str[start:], token.until)
+			if endOffset < 0 {
+				return nil
+			}
+		}
+
+		if !token.skip {
+			ret[idx] = start
+			ret[idx+1] = start + endOffset
+			idx += 2
+		}
+		start = start + endOffset + len(token.until)
+	}
+
+	ret[1] = start
+
+	return ret
+}
+
+// Map of key-names to index's in FindSubmatchIndex's return
+func (s *Dissect) SubexpNameTable() map[string]int {
+	return s.groupNames
+}
diff --git a/pkg/dissect/dissect_test.go b/pkg/dissect/dissect_test.go
new file mode 100644
index 00000000..7bdff13f
--- /dev/null
+++ b/pkg/dissect/dissect_test.go
@@ -0,0 +1,92 @@
+package dissect
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestDissectBasic(t *testing.T) {
+	d := MustNew("%{val};%{};%{?skip} - %{val2}").CreateInstance()
+
+	assert.Equal(t, []int{0, 17, 0, 5, 12, 17}, d.FindSubmatchIndex([]byte("Hello;a;b - there")))
+
+	assert.Equal(t, map[string]int{
+		"val":  1,
+		"val2": 2,
+	}, d.SubexpNameTable())
+}
+
+func TestEmpty(t *testing.T) {
+	d := MustNew("").CreateInstance()
+	assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("hello")))
+	assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("")))
+}
+
+func TestNoTokens(t *testing.T) {
+	d := MustNew("test").CreateInstance()
+
+	assert.Nil(t, d.FindSubmatchIndex([]byte("hello there")))
+	assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("test")))
+	assert.Equal(t, []int{1, 5}, d.FindSubmatchIndex([]byte("atest")))
+	assert.Equal(t, []int{3, 7}, d.FindSubmatchIndex([]byte("abctest")))
+	assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("testa")))
+	assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("testabc")))
+	assert.Equal(t, []int{3, 7}, d.FindSubmatchIndex([]byte("abctestabc")))
+}
+
+func TestPrefix(t *testing.T) {
+	d := MustNew("mid %{val};%{val2} after").CreateInstance()
+
+	assert.Equal(t, []int{12, 29, 16, 19, 20, 23}, d.FindSubmatchIndex([]byte("string with mid 123;456 after k")))
+	assert.Nil(t, d.FindSubmatchIndex([]byte("string with mi 123;456 after k")))
+	assert.Nil(t, d.FindSubmatchIndex([]byte("string with mid 123;456 boom k")))
+	assert.Nil(t, d.FindSubmatchIndex([]byte("")))
+}
+
+func TestSuffix(t *testing.T) {
+	d := MustNew("%{val};%{val2} after").CreateInstance()
+
+	assert.Equal(t, []int{0, 13, 0, 3, 4, 7}, d.FindSubmatchIndex([]byte("123;456 after k")))
+	assert.Equal(t, []int{0, 17, 0, 7, 8, 11}, d.FindSubmatchIndex([]byte("hah 123;456 after k")))
+	assert.Nil(t, d.FindSubmatchIndex([]byte("123;456 boom k")))
+	assert.Nil(t, d.FindSubmatchIndex([]byte("")))
+
+	assert.Equal(t, []int{2, 13, 6, 13}, MustNew("end %{nada}").CreateInstance().FindSubmatchIndex([]byte("a end nothing")))
+}
+
+func TestNoPrefixSuffix(t *testing.T) {
+	d := MustNew("%{onlymatch}").CreateInstance()
+	assert.Equal(t, []int{0, 5, 0, 5}, d.FindSubmatchIndex([]byte("a b c")))
+}
+
+func TestErrorCompile(t *testing.T) {
+	// Unclosed
+	_, err := New("unclosed %{")
+	assert.ErrorIs(t, err, ErrorUnclosedToken)
+
+	// Dupe key
+	_, err = New("a %{a} %{a}")
+	assert.ErrorIs(t, err, ErrorKeyConflict)
+
+	// Sequential tokens
+	_, err = New("a %{a}%{b}")
+	assert.ErrorIs(t, err, ErrorSequentialToken)
+}
+
+func TestMustPanics(t *testing.T) {
+	assert.Panics(t, func() {
+		MustNew("%{bad expr")
+	})
+}
+
+// 88 ns
+func BenchmarkDissect(b *testing.B) {
+	d, _ := New("t%{val} ")
+	di := d.CreateInstance()
+	val := []byte("this is a test ")
+
+	for i := 0; i < b.N; i++ {
+		di.FindSubmatchIndex(val)
+	}
+}
diff --git a/pkg/dissect/errors.go b/pkg/dissect/errors.go
new file mode 100644
index 00000000..cac69b56
--- /dev/null
+++ b/pkg/dissect/errors.go
@@ -0,0 +1,9 @@
+package dissect
+
+import "errors"
+
+var (
+	ErrorKeyConflict     = errors.New("key conflict")
+	ErrorUnclosedToken   = errors.New("unclosed token")
+	ErrorSequentialToken = errors.New("sequential token")
+)

From c87e087a7c84166cd40a4cf246bb7d348951c0a1 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Tue, 31 Dec 2024 16:42:46 -0500
Subject: [PATCH 02/14] wont support append for now

---
 pkg/dissect/dissect.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pkg/dissect/dissect.go b/pkg/dissect/dissect.go
index 400d02c9..26e60ed3 100644
--- a/pkg/dissect/dissect.go
+++ b/pkg/dissect/dissect.go
@@ -12,7 +12,6 @@ import (
 // this is a subset of functionality
 // %{key} -- Named key
 // %{} or %{?key} -- Named skipped key
-// %{+key} -- Append key, but MUST appear in-order, and will include delim
 // Does NOT support reference keys directly
 
 // Like fastregex, Dissect is NOT thread-safe, and an instance should be created
@@ -82,7 +81,6 @@ func New(expr string) (*Dissect, error) {
 		case keyName[0] == '?':
 			skipped = true
 			keyName = keyName[1:]
-			// TODO: Append
 		}
 
 		parts = append(parts, token{

From ea76f22bcd5bff8876651049076b8e7659c8d0a3 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 14:02:27 -0500
Subject: [PATCH 03/14] Repackage dissect and regex into matchers

---
 cmd/helpers/extractorBuilder.go                 |  2 +-
 main.go                                         |  2 +-
 pkg/extractor/extractor.go                      |  2 +-
 pkg/{ => matchers}/dissect/dissect.go           |  0
 pkg/{ => matchers}/dissect/dissect_test.go      |  2 +-
 pkg/{ => matchers}/dissect/errors.go            |  0
 pkg/matchers/factory.go                         | 17 +++++++++++++++++
 pkg/{ => matchers}/fastregex/fastregex.go       |  2 +-
 pkg/{ => matchers}/fastregex/fastregex_test.go  |  0
 pkg/{ => matchers}/fastregex/pcre2.go           |  0
 pkg/{ => matchers}/fastregex/pcre2_test.go      |  0
 .../fallback.go => matchers/fastregex/re2.go}   |  0
 pkg/matchers/intf.go                            | 12 ++++++++++++
 13 files changed, 34 insertions(+), 5 deletions(-)
 rename pkg/{ => matchers}/dissect/dissect.go (100%)
 rename pkg/{ => matchers}/dissect/dissect_test.go (98%)
 rename pkg/{ => matchers}/dissect/errors.go (100%)
 create mode 100644 pkg/matchers/factory.go
 rename pkg/{ => matchers}/fastregex/fastregex.go (94%)
 rename pkg/{ => matchers}/fastregex/fastregex_test.go (100%)
 rename pkg/{ => matchers}/fastregex/pcre2.go (100%)
 rename pkg/{ => matchers}/fastregex/pcre2_test.go (100%)
 rename pkg/{fastregex/fallback.go => matchers/fastregex/re2.go} (100%)
 create mode 100644 pkg/matchers/intf.go

diff --git a/cmd/helpers/extractorBuilder.go b/cmd/helpers/extractorBuilder.go
index eba4a7ba..f7d2d90b 100644
--- a/cmd/helpers/extractorBuilder.go
+++ b/cmd/helpers/extractorBuilder.go
@@ -146,7 +146,7 @@ func getExtractorFlags() []cli.Flag {
 			Usage:    "Compile regex as against posix standard",
 		},
 		&cli.StringFlag{
-			Name:     "match,m",
+			Name:     "match",
 			Aliases:  []string{"m"},
 			Category: cliCategoryMatching,
 			Usage:    "Regex to create match groups to summarize on",
diff --git a/main.go b/main.go
index f2c71744..34ad5d6b 100644
--- a/main.go
+++ b/main.go
@@ -10,9 +10,9 @@ import (
 	"rare/pkg/expressions/funcfile"
 	"rare/pkg/expressions/funclib"
 	"rare/pkg/expressions/stdlib"
-	"rare/pkg/fastregex"
 	"rare/pkg/humanize"
 	"rare/pkg/logger"
+	"rare/pkg/matchers/fastregex"
 	"rare/pkg/multiterm"
 	"rare/pkg/multiterm/termunicode"
 
diff --git a/pkg/extractor/extractor.go b/pkg/extractor/extractor.go
index 5c7db8b1..d0a489f1 100644
--- a/pkg/extractor/extractor.go
+++ b/pkg/extractor/extractor.go
@@ -3,7 +3,7 @@ package extractor
 import (
 	"rare/pkg/expressions"
 	"rare/pkg/expressions/funclib"
-	"rare/pkg/fastregex"
+	"rare/pkg/matchers/fastregex"
 	"sync"
 	"sync/atomic"
 	"unsafe"
diff --git a/pkg/dissect/dissect.go b/pkg/matchers/dissect/dissect.go
similarity index 100%
rename from pkg/dissect/dissect.go
rename to pkg/matchers/dissect/dissect.go
diff --git a/pkg/dissect/dissect_test.go b/pkg/matchers/dissect/dissect_test.go
similarity index 98%
rename from pkg/dissect/dissect_test.go
rename to pkg/matchers/dissect/dissect_test.go
index 7bdff13f..20cafa7a 100644
--- a/pkg/dissect/dissect_test.go
+++ b/pkg/matchers/dissect/dissect_test.go
@@ -60,7 +60,7 @@ func TestNoPrefixSuffix(t *testing.T) {
 	assert.Equal(t, []int{0, 5, 0, 5}, d.FindSubmatchIndex([]byte("a b c")))
 }
 
-func TestErrorCompile(t *testing.T) {
+func TestErrorNew(t *testing.T) {
 	// Unclosed
 	_, err := New("unclosed %{")
 	assert.ErrorIs(t, err, ErrorUnclosedToken)
diff --git a/pkg/dissect/errors.go b/pkg/matchers/dissect/errors.go
similarity index 100%
rename from pkg/dissect/errors.go
rename to pkg/matchers/dissect/errors.go
diff --git a/pkg/matchers/factory.go b/pkg/matchers/factory.go
new file mode 100644
index 00000000..2cbad3e0
--- /dev/null
+++ b/pkg/matchers/factory.go
@@ -0,0 +1,17 @@
+package matchers
+
+type LikeFactory[T Matcher] interface {
+	CreateInstance() T
+}
+
+type factoryWrapper[T Matcher] struct {
+	matcher LikeFactory[T]
+}
+
+func (s *factoryWrapper[T]) CreateInstance() Matcher {
+	return s.matcher.CreateInstance()
+}
+
+func ToFactory[T Matcher](f LikeFactory[T]) Factory {
+	return &factoryWrapper[T]{f}
+}
diff --git a/pkg/fastregex/fastregex.go b/pkg/matchers/fastregex/fastregex.go
similarity index 94%
rename from pkg/fastregex/fastregex.go
rename to pkg/matchers/fastregex/fastregex.go
index 2adbb7f6..b20a8daf 100644
--- a/pkg/fastregex/fastregex.go
+++ b/pkg/matchers/fastregex/fastregex.go
@@ -7,7 +7,7 @@ type CompiledRegexp interface {
 
 // Regexp serves as an abstraction interface for regex classes
 // and shares the same methods as the re2/regexp implementation
-// which allows for easy fallback. This interface is expeted
+// which allows for easy fallback. This interface is expected
 // to only be used by a single thread/goroutine
 type Regexp interface {
 	Match(b []byte) bool
diff --git a/pkg/fastregex/fastregex_test.go b/pkg/matchers/fastregex/fastregex_test.go
similarity index 100%
rename from pkg/fastregex/fastregex_test.go
rename to pkg/matchers/fastregex/fastregex_test.go
diff --git a/pkg/fastregex/pcre2.go b/pkg/matchers/fastregex/pcre2.go
similarity index 100%
rename from pkg/fastregex/pcre2.go
rename to pkg/matchers/fastregex/pcre2.go
diff --git a/pkg/fastregex/pcre2_test.go b/pkg/matchers/fastregex/pcre2_test.go
similarity index 100%
rename from pkg/fastregex/pcre2_test.go
rename to pkg/matchers/fastregex/pcre2_test.go
diff --git a/pkg/fastregex/fallback.go b/pkg/matchers/fastregex/re2.go
similarity index 100%
rename from pkg/fastregex/fallback.go
rename to pkg/matchers/fastregex/re2.go
diff --git a/pkg/matchers/intf.go b/pkg/matchers/intf.go
new file mode 100644
index 00000000..3a9b3fe1
--- /dev/null
+++ b/pkg/matchers/intf.go
@@ -0,0 +1,12 @@
+package matchers
+
+// A thread-safe compiled matcher that can create instances
+type Factory interface {
+	CreateInstance() Matcher
+}
+
+// A non-thread-safe matcher that can be used to find matches
+type Matcher interface {
+	FindSubmatchIndex(b []byte) []int
+	SubexpNameTable() map[string]int
+}

From 61adc4b75b7b1134dc8cfdec49f3ce9baedd9d73 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 14:18:06 -0500
Subject: [PATCH 04/14] Refactor code so extractor takes generic matcher

---
 cmd/helpers/extractorBuilder.go        | 49 +++++++++++++++++++++++---
 cmd/helpers/updatingAggregator_test.go |  4 ++-
 pkg/extractor/extractor.go             | 22 +++++-------
 pkg/extractor/extractor_test.go        | 14 ++++----
 pkg/extractor_test/benchmark_test.go   |  4 ++-
 pkg/matchers/factory.go                |  1 +
 6 files changed, 68 insertions(+), 26 deletions(-)

diff --git a/cmd/helpers/extractorBuilder.go b/cmd/helpers/extractorBuilder.go
index f7d2d90b..e38a4641 100644
--- a/cmd/helpers/extractorBuilder.go
+++ b/cmd/helpers/extractorBuilder.go
@@ -1,12 +1,16 @@
 package helpers
 
 import (
+	"errors"
 	"os"
 	"rare/pkg/expressions"
 	"rare/pkg/extractor"
 	"rare/pkg/extractor/batchers"
 	"rare/pkg/extractor/dirwalk"
 	"rare/pkg/logger"
+	"rare/pkg/matchers"
+	"rare/pkg/matchers/dissect"
+	"rare/pkg/matchers/fastregex"
 	"runtime"
 	"strings"
 
@@ -74,15 +78,15 @@ func BuildExtractorFromArguments(c *cli.Context, batcher *batchers.Batcher) *ext
 
 func BuildExtractorFromArgumentsEx(c *cli.Context, batcher *batchers.Batcher, sep string) *extractor.Extractor {
 	config := extractor.Config{
-		Posix:   c.Bool("posix"),
-		Regex:   c.String("match"),
 		Extract: strings.Join(c.StringSlice("extract"), sep),
 		Workers: c.Int("workers"),
 	}
 
-	if c.Bool("ignore-case") {
-		config.Regex = "(?i)" + config.Regex
+	matcher, err := BuildMatcherFromArguments(c)
+	if err != nil {
+		logger.Fatalln(ExitCodeInvalidUsage, err)
 	}
+	config.Matcher = matcher
 
 	ignoreSlice := c.StringSlice("ignore")
 	if len(ignoreSlice) > 0 {
@@ -100,6 +104,37 @@ func BuildExtractorFromArgumentsEx(c *cli.Context, batcher *batchers.Batcher, se
 	return ret
 }
 
+func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) {
+	var (
+		matchExpr   = c.String("match")
+		dissectExpr = c.String("dissect")
+		posix       = c.Bool("posix")
+		ignoreCase  = c.Bool("ignore-case")
+	)
+
+	switch {
+	case c.IsSet("match") && c.IsSet("dissect"):
+		return nil, errors.New("match and dissect conflict")
+	case c.IsSet("dissect"):
+		// TODO: Ignore case
+		d, err := dissect.New(dissectExpr)
+		if err != nil {
+			return nil, err
+		}
+		return matchers.ToFactory(d), nil
+	default: // match has a default (OPTIMIZE: Dont bother with regex now that we have a wrapper??)
+		if ignoreCase {
+			matchExpr = "(?i)" + matchExpr
+		}
+
+		r, err := fastregex.CompileEx(matchExpr, posix)
+		if err != nil {
+			return nil, err
+		}
+		return matchers.ToFactory(r), nil
+	}
+}
+
 func getExtractorFlags() []cli.Flag {
 	workerCount := runtime.NumCPU()/2 + 1
 
@@ -152,6 +187,12 @@ func getExtractorFlags() []cli.Flag {
 			Usage:    "Regex to create match groups to summarize on",
 			Value:    ".*",
 		},
+		&cli.StringFlag{
+			Name:     "dissect",
+			Aliases:  []string{"d"},
+			Category: cliCategoryMatching,
+			Usage:    "Dissect expression create match groups to summarize on",
+		},
 		&cli.StringSliceFlag{
 			Name:     "extract",
 			Aliases:  []string{"e"},
diff --git a/cmd/helpers/updatingAggregator_test.go b/cmd/helpers/updatingAggregator_test.go
index 336fb590..6b673ec3 100644
--- a/cmd/helpers/updatingAggregator_test.go
+++ b/cmd/helpers/updatingAggregator_test.go
@@ -4,6 +4,8 @@ import (
 	"io"
 	"rare/pkg/extractor"
 	"rare/pkg/extractor/batchers"
+	"rare/pkg/matchers"
+	"rare/pkg/matchers/fastregex"
 	"strings"
 	"testing"
 
@@ -31,7 +33,7 @@ func TestAggregationLoop(t *testing.T) {
 	// Build a real extractor
 	batcher := batchers.OpenReaderToChan("test", io.NopCloser(strings.NewReader(testData)), 1, 1)
 	ex, err := extractor.New(batcher.BatchChan(), &extractor.Config{
-		Regex:   `(\d+)`,
+		Matcher: matchers.ToFactory(fastregex.MustCompile(`(\d+)`)),
 		Extract: "val:{1}",
 		Workers: 1,
 	})
diff --git a/pkg/extractor/extractor.go b/pkg/extractor/extractor.go
index d0a489f1..4a9884df 100644
--- a/pkg/extractor/extractor.go
+++ b/pkg/extractor/extractor.go
@@ -3,7 +3,7 @@ package extractor
 import (
 	"rare/pkg/expressions"
 	"rare/pkg/expressions/funclib"
-	"rare/pkg/matchers/fastregex"
+	"rare/pkg/matchers"
 	"sync"
 	"sync/atomic"
 	"unsafe"
@@ -31,11 +31,10 @@ type Match struct {
 
 // Config for the extractor
 type Config struct {
-	Posix   bool      // Posix parse regex
-	Regex   string    // Regex to find matches
-	Extract string    // Extract these values from regex (expression)
-	Workers int       // Workers to parse regex
-	Ignore  IgnoreSet // Ignore these truthy expressions
+	Matcher matchers.Factory // Matcher
+	Extract string           // Extract these values from regex (expression)
+	Workers int              // Workers to parse regex
+	Ignore  IgnoreSet        // Ignore these truthy expressions
 }
 
 // Extractor is the representation of the reader
@@ -43,7 +42,7 @@ type Config struct {
 //	Expects someone to consume its ReadChan()
 type Extractor struct {
 	readChan       chan []Match
-	compiledRegexp fastregex.CompiledRegexp
+	compiledRegexp matchers.Factory
 	readLines      uint64
 	matchedLines   uint64
 	ignoredLines   uint64
@@ -54,7 +53,7 @@ type Extractor struct {
 
 type extractorInstance struct {
 	*Extractor
-	re      fastregex.Regexp
+	re      matchers.Matcher
 	context *SliceSpaceExpressionContext
 }
 
@@ -157,14 +156,9 @@ func New(inputBatch <-chan InputBatch, config *Config) (*Extractor, error) {
 		return nil, compErr
 	}
 
-	compiledRegex, err := fastregex.CompileEx(config.Regex, config.Posix)
-	if err != nil {
-		return nil, err
-	}
-
 	extractor := Extractor{
 		readChan:       make(chan []Match, 5),
-		compiledRegexp: compiledRegex,
+		compiledRegexp: config.Matcher,
 		keyBuilder:     compiledExpression,
 		config:         *config,
 		ignore:         config.Ignore,
diff --git a/pkg/extractor/extractor_test.go b/pkg/extractor/extractor_test.go
index 81acc1ee..0a716462 100644
--- a/pkg/extractor/extractor_test.go
+++ b/pkg/extractor/extractor_test.go
@@ -1,6 +1,8 @@
 package extractor
 
 import (
+	"rare/pkg/matchers"
+	"rare/pkg/matchers/fastregex"
 	"strings"
 	"testing"
 
@@ -15,7 +17,7 @@ xxx`
 func TestBasicExtractor(t *testing.T) {
 	input := convertReaderToBatches("test", strings.NewReader(testData), 1)
 	ex, err := New(input, &Config{
-		Regex:   `(\d+)`,
+		Matcher: matchers.ToFactory(fastregex.MustCompile(`(\d+)`)),
 		Extract: "val:{1}",
 		Workers: 1,
 	})
@@ -39,7 +41,7 @@ func TestBasicExtractor(t *testing.T) {
 func TestSourceAndLine(t *testing.T) {
 	input := convertReaderToBatches("test", strings.NewReader(testData), 1)
 	ex, err := New(input, &Config{
-		Regex:   `(\d+)`,
+		Matcher: matchers.ToFactory(fastregex.MustCompile(`(\d+)`)),
 		Extract: "{src} {line} val:{1} {bad} {@}",
 		Workers: 1,
 	})
@@ -57,7 +59,7 @@ func TestIgnoreLines(t *testing.T) {
 	input := convertReaderToBatches("test", strings.NewReader(testData), 1)
 	ignore, _ := NewIgnoreExpressions(`{eq {1} "123"}`)
 	ex, err := New(input, &Config{
-		Regex:   `(\d+)`,
+		Matcher: matchers.ToFactory(fastregex.MustCompile(`(\d+)`)),
 		Extract: "{src} {line} val:{1} {bad}{500}",
 		Workers: 1,
 		Ignore:  ignore,
@@ -72,7 +74,7 @@ func TestIgnoreLines(t *testing.T) {
 func TestNamedGroup(t *testing.T) {
 	input := convertReaderToBatches("test", strings.NewReader(testData), 1)
 	ex, err := New(input, &Config{
-		Regex:   `(?P<num>\d+)`,
+		Matcher: matchers.ToFactory(fastregex.MustCompile(`(?P<num>\d+)`)),
 		Extract: "val:{1}:{num}",
 		Workers: 1,
 	})
@@ -87,7 +89,7 @@ func TestNamedGroup(t *testing.T) {
 func TestJSONOutput(t *testing.T) {
 	input := convertReaderToBatches("test", strings.NewReader(testData), 1)
 	ex, err := New(input, &Config{
-		Regex:   `(?P<num>\d+)`,
+		Matcher: matchers.ToFactory(fastregex.MustCompile(`(?P<num>\d+)`)),
 		Extract: "{.} {#} {.#} {#.}",
 		Workers: 1,
 	})
@@ -100,7 +102,7 @@ func TestJSONOutput(t *testing.T) {
 func TestGH10SliceBoundsPanic(t *testing.T) {
 	input := convertReaderToBatches("", strings.NewReader("this is an [ERROR] message"), 1)
 	ex, err := New(input, &Config{
-		Regex:   `\[(INFO)|(ERROR)|(WARNING)|(CRITICAL)\]`,
+		Matcher: matchers.ToFactory(fastregex.MustCompile(`\[(INFO)|(ERROR)|(WARNING)|(CRITICAL)\]`)),
 		Extract: "val:{2} val:{3}",
 		Workers: 1,
 	})
diff --git a/pkg/extractor_test/benchmark_test.go b/pkg/extractor_test/benchmark_test.go
index 6f7dadd9..8dcf982e 100644
--- a/pkg/extractor_test/benchmark_test.go
+++ b/pkg/extractor_test/benchmark_test.go
@@ -2,6 +2,8 @@ package benchmark_test
 
 import (
 	"rare/pkg/extractor"
+	"rare/pkg/matchers"
+	"rare/pkg/matchers/fastregex"
 	"testing"
 )
 
@@ -29,7 +31,7 @@ func BenchmarkExtractor(b *testing.B) {
 	for n := 0; n < b.N; n++ {
 		gen := batchInputGenerator(10000, 100)
 		extractor, _ := extractor.New(gen, &extractor.Config{
-			Regex:   `(\d{3})`,
+			Matcher: matchers.ToFactory(fastregex.MustCompile(`(\d{3})`)),
 			Extract: "{bucket {1} 10}",
 			Workers: 2,
 		})
diff --git a/pkg/matchers/factory.go b/pkg/matchers/factory.go
index 2cbad3e0..5fcdda26 100644
--- a/pkg/matchers/factory.go
+++ b/pkg/matchers/factory.go
@@ -12,6 +12,7 @@ func (s *factoryWrapper[T]) CreateInstance() Matcher {
 	return s.matcher.CreateInstance()
 }
 
+// Maps a factory-like interface to a matcher factory
 func ToFactory[T Matcher](f LikeFactory[T]) Factory {
 	return &factoryWrapper[T]{f}
 }

From 0b39b6a949bfaf21e7f0dc0512c30cc53f4d6898 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 14:20:17 -0500
Subject: [PATCH 05/14] Rename new to compile

---
 cmd/helpers/extractorBuilder.go      |  2 +-
 pkg/matchers/dissect/dissect.go      |  6 +++---
 pkg/matchers/dissect/dissect_test.go | 24 ++++++++++++------------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/cmd/helpers/extractorBuilder.go b/cmd/helpers/extractorBuilder.go
index e38a4641..aa48acc9 100644
--- a/cmd/helpers/extractorBuilder.go
+++ b/cmd/helpers/extractorBuilder.go
@@ -117,7 +117,7 @@ func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) {
 		return nil, errors.New("match and dissect conflict")
 	case c.IsSet("dissect"):
 		// TODO: Ignore case
-		d, err := dissect.New(dissectExpr)
+		d, err := dissect.Compile(dissectExpr)
 		if err != nil {
 			return nil, err
 		}
diff --git a/pkg/matchers/dissect/dissect.go b/pkg/matchers/dissect/dissect.go
index 26e60ed3..a2dbc577 100644
--- a/pkg/matchers/dissect/dissect.go
+++ b/pkg/matchers/dissect/dissect.go
@@ -35,7 +35,7 @@ type DissectInstance struct {
 	groupPool *slicepool.IntPool
 }
 
-func New(expr string) (*Dissect, error) {
+func Compile(expr string) (*Dissect, error) {
 
 	parts := make([]token, 0)
 	groupNames := make(map[string]int)
@@ -106,8 +106,8 @@ func New(expr string) (*Dissect, error) {
 	}, nil
 }
 
-func MustNew(expr string) *Dissect {
-	d, err := New(expr)
+func MustCompile(expr string) *Dissect {
+	d, err := Compile(expr)
 	if err != nil {
 		panic(err)
 	}
diff --git a/pkg/matchers/dissect/dissect_test.go b/pkg/matchers/dissect/dissect_test.go
index 20cafa7a..55d4e72e 100644
--- a/pkg/matchers/dissect/dissect_test.go
+++ b/pkg/matchers/dissect/dissect_test.go
@@ -7,7 +7,7 @@ import (
 )
 
 func TestDissectBasic(t *testing.T) {
-	d := MustNew("%{val};%{};%{?skip} - %{val2}").CreateInstance()
+	d := MustCompile("%{val};%{};%{?skip} - %{val2}").CreateInstance()
 
 	assert.Equal(t, []int{0, 17, 0, 5, 12, 17}, d.FindSubmatchIndex([]byte("Hello;a;b - there")))
 
@@ -18,13 +18,13 @@ func TestDissectBasic(t *testing.T) {
 }
 
 func TestEmpty(t *testing.T) {
-	d := MustNew("").CreateInstance()
+	d := MustCompile("").CreateInstance()
 	assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("hello")))
 	assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("")))
 }
 
 func TestNoTokens(t *testing.T) {
-	d := MustNew("test").CreateInstance()
+	d := MustCompile("test").CreateInstance()
 
 	assert.Nil(t, d.FindSubmatchIndex([]byte("hello there")))
 	assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("test")))
@@ -36,7 +36,7 @@ func TestNoTokens(t *testing.T) {
 }
 
 func TestPrefix(t *testing.T) {
-	d := MustNew("mid %{val};%{val2} after").CreateInstance()
+	d := MustCompile("mid %{val};%{val2} after").CreateInstance()
 
 	assert.Equal(t, []int{12, 29, 16, 19, 20, 23}, d.FindSubmatchIndex([]byte("string with mid 123;456 after k")))
 	assert.Nil(t, d.FindSubmatchIndex([]byte("string with mi 123;456 after k")))
@@ -45,44 +45,44 @@ func TestPrefix(t *testing.T) {
 }
 
 func TestSuffix(t *testing.T) {
-	d := MustNew("%{val};%{val2} after").CreateInstance()
+	d := MustCompile("%{val};%{val2} after").CreateInstance()
 
 	assert.Equal(t, []int{0, 13, 0, 3, 4, 7}, d.FindSubmatchIndex([]byte("123;456 after k")))
 	assert.Equal(t, []int{0, 17, 0, 7, 8, 11}, d.FindSubmatchIndex([]byte("hah 123;456 after k")))
 	assert.Nil(t, d.FindSubmatchIndex([]byte("123;456 boom k")))
 	assert.Nil(t, d.FindSubmatchIndex([]byte("")))
 
-	assert.Equal(t, []int{2, 13, 6, 13}, MustNew("end %{nada}").CreateInstance().FindSubmatchIndex([]byte("a end nothing")))
+	assert.Equal(t, []int{2, 13, 6, 13}, MustCompile("end %{nada}").CreateInstance().FindSubmatchIndex([]byte("a end nothing")))
 }
 
 func TestNoPrefixSuffix(t *testing.T) {
-	d := MustNew("%{onlymatch}").CreateInstance()
+	d := MustCompile("%{onlymatch}").CreateInstance()
 	assert.Equal(t, []int{0, 5, 0, 5}, d.FindSubmatchIndex([]byte("a b c")))
 }
 
 func TestErrorNew(t *testing.T) {
 	// Unclosed
-	_, err := New("unclosed %{")
+	_, err := Compile("unclosed %{")
 	assert.ErrorIs(t, err, ErrorUnclosedToken)
 
 	// Dupe key
-	_, err = New("a %{a} %{a}")
+	_, err = Compile("a %{a} %{a}")
 	assert.ErrorIs(t, err, ErrorKeyConflict)
 
 	// Sequential tokens
-	_, err = New("a %{a}%{b}")
+	_, err = Compile("a %{a}%{b}")
 	assert.ErrorIs(t, err, ErrorSequentialToken)
 }
 
 func TestMustPanics(t *testing.T) {
 	assert.Panics(t, func() {
-		MustNew("%{bad expr")
+		MustCompile("%{bad expr")
 	})
 }
 
 // 88 ns
 func BenchmarkDissect(b *testing.B) {
-	d, _ := New("t%{val} ")
+	d, _ := Compile("t%{val} ")
 	di := d.CreateInstance()
 	val := []byte("this is a test ")
 

From 1ae6f8206e8b4d9a05f01e22daa249582ceec29b Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 18:05:47 -0500
Subject: [PATCH 06/14] Add case-insensitivity to dissect

---
 cmd/helpers/extractorBuilder.go      |  3 +--
 pkg/matchers/dissect/case.go         | 37 ++++++++++++++++++++++++++++
 pkg/matchers/dissect/case_test.go    |  1 +
 pkg/matchers/dissect/dissect.go      | 26 +++++++++++++++----
 pkg/matchers/dissect/dissect_test.go | 20 +++++++++++++--
 5 files changed, 78 insertions(+), 9 deletions(-)
 create mode 100644 pkg/matchers/dissect/case.go
 create mode 100644 pkg/matchers/dissect/case_test.go

diff --git a/cmd/helpers/extractorBuilder.go b/cmd/helpers/extractorBuilder.go
index aa48acc9..b97213c4 100644
--- a/cmd/helpers/extractorBuilder.go
+++ b/cmd/helpers/extractorBuilder.go
@@ -116,8 +116,7 @@ func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) {
 	case c.IsSet("match") && c.IsSet("dissect"):
 		return nil, errors.New("match and dissect conflict")
 	case c.IsSet("dissect"):
-		// TODO: Ignore case
-		d, err := dissect.Compile(dissectExpr)
+		d, err := dissect.CompileEx(dissectExpr, ignoreCase)
 		if err != nil {
 			return nil, err
 		}
diff --git a/pkg/matchers/dissect/case.go b/pkg/matchers/dissect/case.go
new file mode 100644
index 00000000..cd2befab
--- /dev/null
+++ b/pkg/matchers/dissect/case.go
@@ -0,0 +1,37 @@
+package dissect
+
+import "unicode"
+
+// Finds case-insensitive index of second string
+// ASSUMES second string is already lowered (optimization)
+func indexIgnoreCase(s, substr string) int {
+	n := len(substr)
+	switch {
+	case n == 0:
+		return 0
+	case len(s) < n:
+		return -1
+	case len(s) == n:
+		for i := 0; i < n; i++ {
+			if unicode.ToLower(rune(s[i])) != rune(substr[i]) {
+				return -1
+			}
+		}
+		return 0
+	default:
+		for i := 0; i <= len(s)-n; i++ {
+			match := true
+			for j := 0; j < n; j++ {
+				if unicode.ToLower(rune(s[i+j])) != rune(substr[j]) {
+					match = false
+					break
+				}
+			}
+			if match {
+				return i
+			}
+		}
+		return -1
+	}
+
+}
diff --git a/pkg/matchers/dissect/case_test.go b/pkg/matchers/dissect/case_test.go
new file mode 100644
index 00000000..2bb2e1b6
--- /dev/null
+++ b/pkg/matchers/dissect/case_test.go
@@ -0,0 +1 @@
+package dissect
diff --git a/pkg/matchers/dissect/dissect.go b/pkg/matchers/dissect/dissect.go
index a2dbc577..48910187 100644
--- a/pkg/matchers/dissect/dissect.go
+++ b/pkg/matchers/dissect/dissect.go
@@ -23,8 +23,9 @@ type token struct {
 }
 
 type Dissect struct {
-	tokens []token
-	prefix string
+	tokens  []token
+	prefix  string
+	indexOf func(src, of string) int
 
 	groupNames map[string]int
 	groupCount int
@@ -35,7 +36,7 @@ type DissectInstance struct {
 	groupPool *slicepool.IntPool
 }
 
-func Compile(expr string) (*Dissect, error) {
+func CompileEx(expr string, ignoreCase bool) (*Dissect, error) {
 
 	parts := make([]token, 0)
 	groupNames := make(map[string]int)
@@ -73,6 +74,10 @@ func Compile(expr string) (*Dissect, error) {
 		keyUntil := expr[:end]
 		expr = expr[end:]
 
+		if ignoreCase {
+			keyUntil = strings.ToLower(keyUntil)
+		}
+
 		skipped := false
 
 		switch {
@@ -98,14 +103,25 @@ func Compile(expr string) (*Dissect, error) {
 		}
 	}
 
+	indexOfFunc := strings.Index
+	if ignoreCase {
+		indexOfFunc = indexIgnoreCase
+		prefix = strings.ToLower(prefix)
+	}
+
 	return &Dissect{
 		groupNames: groupNames,
 		groupCount: groupIndex,
 		tokens:     parts,
 		prefix:     prefix,
+		indexOf:    indexOfFunc,
 	}, nil
 }
 
+func Compile(expr string) (*Dissect, error) {
+	return CompileEx(expr, false)
+}
+
 func MustCompile(expr string) *Dissect {
 	d, err := Compile(expr)
 	if err != nil {
@@ -129,7 +145,7 @@ func (s *DissectInstance) FindSubmatchIndex(b []byte) []int {
 
 	start := 0
 	if s.prefix != "" {
-		start = strings.Index(str, s.prefix)
+		start = s.indexOf(str, s.prefix)
 		if start < 0 {
 			return nil
 		}
@@ -146,7 +162,7 @@ func (s *DissectInstance) FindSubmatchIndex(b []byte) []int {
 		if token.until == "" {
 			endOffset = len(str[start:])
 		} else {
-			endOffset = strings.Index(str[start:], token.until)
+			endOffset = s.indexOf(str[start:], token.until)
 			if endOffset < 0 {
 				return nil
 			}
diff --git a/pkg/matchers/dissect/dissect_test.go b/pkg/matchers/dissect/dissect_test.go
index 55d4e72e..3ad95409 100644
--- a/pkg/matchers/dissect/dissect_test.go
+++ b/pkg/matchers/dissect/dissect_test.go
@@ -33,6 +33,7 @@ func TestNoTokens(t *testing.T) {
 	assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("testa")))
 	assert.Equal(t, []int{0, 4}, d.FindSubmatchIndex([]byte("testabc")))
 	assert.Equal(t, []int{3, 7}, d.FindSubmatchIndex([]byte("abctestabc")))
+	assert.Nil(t, d.FindSubmatchIndex([]byte("tEst")))
 }
 
 func TestPrefix(t *testing.T) {
@@ -80,9 +81,24 @@ func TestMustPanics(t *testing.T) {
 	})
 }
 
-// 88 ns
+func TestIgnoreCase(t *testing.T) {
+	d, err := CompileEx("TeSt1", true)
+
+	assert.NoError(t, err)
+	assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("test1")))
+	assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("tEst1")))
+	assert.Equal(t, []int{0, 5}, d.CreateInstance().FindSubmatchIndex([]byte("TEST1")))
+	assert.Equal(t, []int{1, 6}, d.CreateInstance().FindSubmatchIndex([]byte("ATest123")))
+	assert.Nil(t, d.CreateInstance().FindSubmatchIndex([]byte("asdf")))
+
+	d, err = CompileEx("pref %{val} post", true)
+	assert.NoError(t, err)
+	assert.Equal(t, []int{2, 13, 7, 8}, d.CreateInstance().FindSubmatchIndex([]byte("a Pref 5 pOst")))
+}
+
+// BenchmarkDissect-4   	13347456	        86.07 ns/op	      32 B/op	       0 allocs/op
 func BenchmarkDissect(b *testing.B) {
-	d, _ := Compile("t%{val} ")
+	d, _ := CompileEx("t%{val} ", true)
 	di := d.CreateInstance()
 	val := []byte("this is a test ")
 

From 3867f4e986296e0629b153aed2fdff7f0f4bcb0e Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 18:10:01 -0500
Subject: [PATCH 07/14] Fix build for moved fastregex package

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3616e546..3c5864f0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -23,7 +23,7 @@ jobs:
       - name: Test
         run: |
           go test -v -race -coverprofile=coverage.txt -covermode=atomic ./...
-          go test -tags=pcre2 rare/pkg/fastregex
+          go test -tags=pcre2 rare/pkg/matchers/fastregex
       - name: StaticCheck
         run: |
           go run honnef.co/go/tools/cmd/staticcheck@2024.1.1 ./...

From 11a475d8d9b9a2f938a72d0e6c32c9a39306be52 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 19:18:41 -0500
Subject: [PATCH 08/14] Unit tests

---
 pkg/matchers/dissect/case_test.go | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/pkg/matchers/dissect/case_test.go b/pkg/matchers/dissect/case_test.go
index 2bb2e1b6..ab822e36 100644
--- a/pkg/matchers/dissect/case_test.go
+++ b/pkg/matchers/dissect/case_test.go
@@ -1 +1,30 @@
 package dissect
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestIndexIgnoreCase(t *testing.T) {
+	assert.Equal(t, 0, indexIgnoreCase("abc", "a"))
+	assert.Equal(t, 0, indexIgnoreCase("abc", ""))
+
+	assert.Equal(t, 1, indexIgnoreCase("abc", "bc"))
+	assert.Equal(t, -1, indexIgnoreCase("abc", "ac"))
+
+	assert.Equal(t, 0, indexIgnoreCase("abc", "abc"))
+
+	assert.Equal(t, -1, indexIgnoreCase("abc", "bca"))
+	assert.Equal(t, -1, indexIgnoreCase("abc", "abcd"))
+
+	assert.Equal(t, 0, indexIgnoreCase("ABC", "a"))
+	assert.Equal(t, 0, indexIgnoreCase("ABC", ""))
+
+	assert.Equal(t, 1, indexIgnoreCase("ABC", "bc"))
+
+	assert.Equal(t, 0, indexIgnoreCase("ABC", "abc"))
+
+	assert.Equal(t, -1, indexIgnoreCase("ABC", "bca"))
+	assert.Equal(t, -1, indexIgnoreCase("ABC", "abcd"))
+}

From af01c6666c81af44b8c1373bf742a1555e2a4793 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 19:53:26 -0500
Subject: [PATCH 09/14] Fix skip-token bug. Add AlwaysMatcher, which makes the
 default case much more performant

---
 cmd/helpers/extractorBuilder.go      |  4 +++-
 pkg/matchers/dissect/dissect.go      | 10 +++++-----
 pkg/matchers/dissect/dissect_test.go |  8 ++++++++
 pkg/matchers/simple.go               | 15 +++++++++++++++
 4 files changed, 31 insertions(+), 6 deletions(-)
 create mode 100644 pkg/matchers/simple.go

diff --git a/cmd/helpers/extractorBuilder.go b/cmd/helpers/extractorBuilder.go
index b97213c4..65089adb 100644
--- a/cmd/helpers/extractorBuilder.go
+++ b/cmd/helpers/extractorBuilder.go
@@ -121,7 +121,7 @@ func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) {
 			return nil, err
 		}
 		return matchers.ToFactory(d), nil
-	default: // match has a default (OPTIMIZE: Dont bother with regex now that we have a wrapper??)
+	case c.IsSet("match"):
 		if ignoreCase {
 			matchExpr = "(?i)" + matchExpr
 		}
@@ -131,6 +131,8 @@ func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) {
 			return nil, err
 		}
 		return matchers.ToFactory(r), nil
+	default:
+		return &matchers.AlwaysMatch{}, nil
 	}
 }
 
diff --git a/pkg/matchers/dissect/dissect.go b/pkg/matchers/dissect/dissect.go
index 48910187..a7ac268c 100644
--- a/pkg/matchers/dissect/dissect.go
+++ b/pkg/matchers/dissect/dissect.go
@@ -46,12 +46,12 @@ func CompileEx(expr string, ignoreCase bool) (*Dissect, error) {
 	for {
 		start := strings.Index(expr, "%{")
 		if start < 0 {
-			if groupIndex == 0 { // no tokens in expr
+			if len(parts) == 0 { // no tokens in expr
 				prefix = expr
 			}
 			break
 		}
-		if groupIndex == 0 {
+		if len(parts) == 0 {
 			prefix = expr[:start]
 		}
 		expr = expr[start+2:]
@@ -78,12 +78,12 @@ func CompileEx(expr string, ignoreCase bool) (*Dissect, error) {
 			keyUntil = strings.ToLower(keyUntil)
 		}
 
+		// Special flags
 		skipped := false
-
 		switch {
-		case len(keyName) == 0:
+		case len(keyName) == 0: // empty skip
 			skipped = true
-		case keyName[0] == '?':
+		case keyName[0] == '?': // named skip
 			skipped = true
 			keyName = keyName[1:]
 		}
diff --git a/pkg/matchers/dissect/dissect_test.go b/pkg/matchers/dissect/dissect_test.go
index 3ad95409..6ac1f3bd 100644
--- a/pkg/matchers/dissect/dissect_test.go
+++ b/pkg/matchers/dissect/dissect_test.go
@@ -17,6 +17,14 @@ func TestDissectBasic(t *testing.T) {
 	}, d.SubexpNameTable())
 }
 
+func TestPrefixOnSkipKey(t *testing.T) {
+	d := MustCompile("prefix %{}: %{val}").CreateInstance()
+
+	assert.Nil(t, d.FindSubmatchIndex([]byte("a: b")))
+	assert.Equal(t, []int{0, 11, 10, 11}, d.FindSubmatchIndex([]byte("prefix a: b")))
+	assert.Nil(t, d.FindSubmatchIndex([]byte("Prefix a: b")))
+}
+
 func TestEmpty(t *testing.T) {
 	d := MustCompile("").CreateInstance()
 	assert.Equal(t, []int{0, 0}, d.FindSubmatchIndex([]byte("hello")))
diff --git a/pkg/matchers/simple.go b/pkg/matchers/simple.go
new file mode 100644
index 00000000..6f8c3d46
--- /dev/null
+++ b/pkg/matchers/simple.go
@@ -0,0 +1,15 @@
+package matchers
+
+type AlwaysMatch struct{}
+
+func (s *AlwaysMatch) CreateInstance() Matcher {
+	return s
+}
+
+func (s *AlwaysMatch) FindSubmatchIndex(b []byte) []int {
+	return []int{0, len(b)}
+}
+
+func (s *AlwaysMatch) SubexpNameTable() map[string]int {
+	return make(map[string]int)
+}

From 57d1cfc70875cf5620b48105a3c224644db0d5c4 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Wed, 1 Jan 2025 19:57:08 -0500
Subject: [PATCH 10/14] Builder tests

---
 cmd/helpers/extractorBuilder_test.go | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/cmd/helpers/extractorBuilder_test.go b/cmd/helpers/extractorBuilder_test.go
index c0a776ad..7a2221fa 100644
--- a/cmd/helpers/extractorBuilder_test.go
+++ b/cmd/helpers/extractorBuilder_test.go
@@ -56,6 +56,10 @@ func TestBuildingExtractorFromContext(t *testing.T) {
 	assert.NoError(t, runApp(""))
 	assert.NoError(t, runApp(`-I -i "{eq {0} abc}" ../testdata/log.txt`))
 	assert.NoError(t, runApp(`-f ../testdata/log.txt`))
+	assert.NoError(t, runApp(`-m ".*" ../testdata/log.txt`))
+	assert.NoError(t, runApp(`-I -m ".*" ../testdata/log.txt`))
+	assert.NoError(t, runApp(`-d "%{}" ../testdata/log.txt`))
+	assert.NoError(t, runApp(`-I -d "%{}" ../testdata/log.txt`))
 	testLogFatal(t, 2, func() {
 		runApp("--batch 0 ../testdata/log.txt")
 	})
@@ -77,5 +81,11 @@ func TestBuildingExtractorFromContext(t *testing.T) {
 	testLogFatal(t, 2, func() {
 		runApp(`-i "{0" -`)
 	})
-	assert.Equal(t, 3, actionCalled)
+	testLogFatal(t, 2, func() {
+		runApp(`-m regex -d dissect -`)
+	})
+	testLogFatal(t, 2, func() {
+		runApp(`-d "%{unclosed" -`)
+	})
+	assert.Equal(t, 7, actionCalled)
 }

From c0d3d6223dbebbfded39523a11d2426be5ca9480 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Thu, 2 Jan 2025 20:43:08 -0500
Subject: [PATCH 11/14] Docs

---
 README.md                       |  1 +
 cmd/helpers/extractorBuilder.go |  2 +-
 docs/cli-help.md                | 32 ++++++++++++----
 docs/index.md                   |  1 +
 docs/usage/dissect.md           | 68 +++++++++++++++++++++++++++++++++
 docs/usage/examples.md          | 10 ++---
 docs/usage/expressions.md       |  4 +-
 docs/usage/extractor.md         | 55 ++++++++++++++++++++++----
 docs/usage/overview.md          |  9 +++--
 mkdocs.yml                      |  1 +
 10 files changed, 156 insertions(+), 27 deletions(-)
 create mode 100644 docs/usage/dissect.md

diff --git a/README.md b/README.md
index 2c163dcf..297df443 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@ See [rare.zdyn.net](https://rare.zdyn.net) or the [docs/ folder](docs/) for the
 ## Features
 
  * Multiple summary formats including: filter (like grep), histogram, bar graphs, tables, heatmaps, reduce, and numerical analysis
+ * Parse using regex (`-m`) or dissect tokenizer (`-d`)
  * File glob expansions (eg `/var/log/*` or `/var/log/*/*.log`) and `-R`
  * Optional gzip decompression (with `-z`)
  * Following `-f` or re-open following `-F` (use `--poll` to poll, and `--tail` to tail)
diff --git a/cmd/helpers/extractorBuilder.go b/cmd/helpers/extractorBuilder.go
index 65089adb..3aca277b 100644
--- a/cmd/helpers/extractorBuilder.go
+++ b/cmd/helpers/extractorBuilder.go
@@ -211,7 +211,7 @@ func getExtractorFlags() []cli.Flag {
 			Name:     "ignore-case",
 			Aliases:  []string{"I"},
 			Category: cliCategoryMatching,
-			Usage:    "Augment regex to be case insensitive",
+			Usage:    "Augment matcher to be case insensitive",
 		},
 		&cli.IntFlag{
 			Name:     "batch",
diff --git a/docs/cli-help.md b/docs/cli-help.md
index 1eff3b25..afeb3946 100644
--- a/docs/cli-help.md
+++ b/docs/cli-help.md
@@ -67,6 +67,8 @@ Filter incoming results with search criteria, and output raw matches
 
 **--batch-buffer**="": Specifies how many batches to read-ahead. Impacts memory usage, can improve performance (default: 6)
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
 
 **--follow, -f**: Read appended data as file grows
@@ -75,7 +77,7 @@ Filter incoming results with search criteria, and output raw matches
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--line, -l**: Output source file and line number
 
@@ -113,6 +115,8 @@ Summarize results by extracting them to a histogram
 
 **--csv, -o**="": Write final results to csv. Use - to output to stdout
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extra, -x**: Alias for -b --percentage
 
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
@@ -123,7 +127,7 @@ Summarize results by extracting them to a histogram
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--match, -m**="": Regex to create match groups to summarize on (default: .*)
 
@@ -167,6 +171,8 @@ Create a 2D heatmap of extracted data
 
 **--delim**="": Character to tabulate on. Use {$} helper by default (default: \x00)
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
 
 **--follow, -f**: Read appended data as file grows
@@ -175,7 +181,7 @@ Create a 2D heatmap of extracted data
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--match, -m**="": Regex to create match groups to summarize on (default: .*)
 
@@ -223,6 +229,8 @@ Create rows of sparkline graphs
 
 **--delim**="": Character to tabulate on. Use {$} helper by default (default: \x00)
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
 
 **--follow, -f**: Read appended data as file grows
@@ -231,7 +239,7 @@ Create rows of sparkline graphs
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--match, -m**="": Regex to create match groups to summarize on (default: .*)
 
@@ -273,6 +281,8 @@ Create a bargraph of the given 1 or 2 dimension data
 
 **--csv, -o**="": Write final results to csv. Use - to output to stdout
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
 
 **--follow, -f**: Read appended data as file grows
@@ -281,7 +291,7 @@ Create a bargraph of the given 1 or 2 dimension data
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--match, -m**="": Regex to create match groups to summarize on (default: .*)
 
@@ -317,6 +327,8 @@ Numerical analysis on a set of filtered data
 
 **--batch-buffer**="": Specifies how many batches to read-ahead. Impacts memory usage, can improve performance (default: 6)
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extra, -x**: Displays extra analysis on the data (Requires more memory and cpu)
 
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
@@ -327,7 +339,7 @@ Numerical analysis on a set of filtered data
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--match, -m**="": Regex to create match groups to summarize on (default: .*)
 
@@ -367,6 +379,8 @@ Create a 2D summarizing table of extracted data
 
 **--delim**="": Character to tabulate on. Use {$} helper by default (default: \x00)
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extra, -x**: Display row and column totals
 
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
@@ -377,7 +391,7 @@ Create a 2D summarizing table of extracted data
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--match, -m**="": Regex to create match groups to summarize on (default: .*)
 
@@ -421,6 +435,8 @@ Aggregate the results of a query based on an expression, pulling customized summ
 
 **--csv, -o**="": Write final results to csv. Use - to output to stdout
 
+**--dissect, -d**="": Dissect expression create match groups to summarize on
+
 **--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{@}])
 
 **--follow, -f**: Read appended data as file grows
@@ -431,7 +447,7 @@ Aggregate the results of a query based on an expression, pulling customized summ
 
 **--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)
 
-**--ignore-case, -I**: Augment regex to be case insensitive
+**--ignore-case, -I**: Augment matcher to be case insensitive
 
 **--initial**="": Specify the default initial value for any accumulators that don't specify (default: 0)
 
diff --git a/docs/index.md b/docs/index.md
index df2a0c94..efc9e53c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -18,6 +18,7 @@ Supports various CLI-based graphing and metric formats (filter (grep-like), hist
 ## Features
 
  * Multiple summary formats including: filter (like grep), histogram, bar graphs, tables, heatmaps, sparklines, reduce, and numerical analysis
+ * Parse using regex (`-m`) or dissect tokenizer (`-d`)
  * File glob expansions (eg `/var/log/*` or `/var/log/*/*.log`) and `-R`
  * Optional gzip decompression (with `-z`)
  * Following `-f` or re-open following `-F` (use `--poll` to poll, and `--tail` to tail)
diff --git a/docs/usage/dissect.md b/docs/usage/dissect.md
new file mode 100644
index 00000000..a8a40137
--- /dev/null
+++ b/docs/usage/dissect.md
@@ -0,0 +1,68 @@
+# Dissect Syntax
+
+*Dissect* is a simple token-based search algorithm, and can
+be up to 10x faster than regex (and 40% faster than PCRE).
+
+It works by searching for for constant delimiters in a string
+and extracting the text between the tokens as named keys.
+
+*rare* implements a subset of the full dissect algorithm.
+
+**Syntax Example:**
+```
+prefix %{name} : %{value} - %{?ignored}
+```
+
+## Syntax
+
+- Anything in a `%{}` is a variable token.
+- A blank token, or a token that starts with `?` is skipped. eg `%{}` or `%{?skipped}`
+- Tokens are extracted by both name and index (in the order they appear).
+- Index `{0}` is the full match, including the delimiters
+- Patterns don't need to match the entire line
+
+## Examples
+
+### Simple
+
+```
+prefix %{name} : %{value}
+```
+
+Will match:
+```
+prefix bob : 123
+```
+
+And will extract two keys:
+```
+name=bob
+value=123
+```
+
+### Nginx Logs
+
+As a simple example, to parse nginx logs that look like:
+
+```
+104.238.185.46 - - [19/Aug/2019:02:26:25 +0000] "GET / HTTP/1.1" 200 546 "-" "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/98 Safari/537.4 (StatusCake)"
+```
+
+The following dissect expression can be used:
+
+```
+%{ip} - - [%{timestamp}] "%{verb} %{path} HTTP/%{?http-version}" %{status} %{size} "-" "%{useragent}"
+```
+
+Which, as json, will return:
+```json
+{
+    "timestamp": "12/Dec/2019:17:54:13 +0000",
+    "verb": "POST",
+    "path": "/temtel.php",
+    "status": 404,
+    "size": 571,
+    "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36",
+    "ip": "203.113.174.104"
+}
+```
diff --git a/docs/usage/examples.md b/docs/usage/examples.md
index d9050792..df4b1eca 100644
--- a/docs/usage/examples.md
+++ b/docs/usage/examples.md
@@ -183,10 +183,10 @@ Matched: 1,035,666 / 1,035,666 (R: 8; C: 61)
 **NOTE:** For stacking (`-s`), the results will be color-coded (not shown here)
 
 ```sh
-$ rare bars -z -m "\[(.+?)\].*\" (\d+)" -e "{buckettime {1} year}" -e "{2}" testdata/*
+$ rare bars -z -m "\[(.+?)\].*\" (\d+)" -e "{buckettime {1} year}" -e "{2}" -s testdata/*
 
-        | 200  | 206  | 301  | 304  | 400  | 404  | 405  | 408
-2019  |||||||||||||||||||||||||||||||||||||||  3,741,444
-2020  |||||||||||||||||||||||||||||||||||||||||||||||||  4,631,884
-Matched: 8,373,328 / 8,383,717
+        0 200  1 206  2 301  3 304  4 400  5 404  6 405  7 408
+2019  000000000555555555555555555555555555555  3,742,444
+2020  0000000000000000004455555555555555555555555555555  4,631,884
+Matched: 8,374,328 / 8,384,811
 ```
diff --git a/docs/usage/expressions.md b/docs/usage/expressions.md
index a22b600f..1642718f 100644
--- a/docs/usage/expressions.md
+++ b/docs/usage/expressions.md
@@ -16,7 +16,7 @@ The basic syntax structure is as follows:
  * Characters can be escaped with `\`, including `\{` or `\n`
  * Expressions are surrounded by `{}`.
  * An integer in an expression denotes a matched value from the regex (or other input) eg. `{2}`. The entire match will always be `{0}`
- * A string in an expression is a special key or a named regex group eg. `{src}` or `{group1}`
+ * A string in an expression is a special key or a named regex/dissect group eg. `{src}` or `{group1}`
  * When an expression has space(s), the first literal will be the name of a helper function.
    From there, the logic is nested. eg `{coalesce {4} {3} notfound}`
  * Quotes in an argument create a single argument eg. `{coalesce {4} {3} "not found"}`
@@ -59,7 +59,7 @@ rare histo \
 	-b access.log
 ```
 
-The above parses the method `{1}`, url `{2}`, status `{3}`, and response size `{4}` in the regex.
+The above parses the method `{1}`, url `{2}`, status `{3}`, and response size `{4}` in the matcher.
 
 It extracts the `<method> <url> <bytesize bucketed to 10k>`. It will ignore `-i` if response size `{4}` is less-than `1024*1024` (1MB).
 
diff --git a/docs/usage/extractor.md b/docs/usage/extractor.md
index d8baf562..8dcc2524 100644
--- a/docs/usage/extractor.md
+++ b/docs/usage/extractor.md
@@ -3,14 +3,54 @@
 The main component of *rare* is the extractor (or matcher).  There are
 three fundamental concepts around the parser:
 
- * Each line of an input (separated by `\n`) is matched to a regex
- * A regex is used to parse a line into a match (and optionally, groups)
+ * Each line of an input (separated by `\n`) is matched to a matcher
+ * A matcher is used to parse a line into a match (and optionally, groups)
  * An expression (see: [expression](expressions.md)) is used to format an
-   output from a regex group
- * Optionally, one or more ignore filter can be applied to silent matches
+   output from a matched groups
+ * Optionally, one or more ignore expressions can be applied to silent matches
    that satisfy a truthy-comparison
 
-## Decomposing a Filter
+## Matcher Types
+
+If no matcher is specified, by default, the entire line is always matched
+and passed-through to the expression-stage.
+
+More than one matcher can **not** be specified at the same time.
+
+### Regex
+
+A regex express is specified with `--match` or `-m`, and follows common
+[regex syntax](regexp.md).
+
+When matching a regex, groups and keys are extracted both index and
+by-name if specified.
+
+Set ignore-case with `-I` or `--ignore-case`.
+
+**Example:**
+
+```bash
+rare filter -m '"(\w{3,4}) ([A-Za-z0-9/.@_-]+)' access.log
+```
+
+### Dissect
+
+A dissect expression is specified with `--disect` or `-d`, and follows
+[dissect syntax](dissect.md).
+
+Like regex, groups are extracted by both index and name.
+
+Set ignore-case with `-I` or `--ignore-case`.
+
+**Example:**
+
+```bash
+rare filter -d 'HTTP/1.1" %{code} ${size}' -e '{code}' access.log
+```
+
+## Examples
+
+### Decomposing a Matcher
 
 The most primitive way use rare is to filter lines in an input.  We'll
 be using an example nginx log for our example.
@@ -34,7 +74,7 @@ If you want it to only output the matched portion, you can add `-e "{0}"`
 Lastly, lets say we want to ignore all paths that equal "/", we could do that by adding
 an ignore pattern: `-i {eq {1} /}`
 
-## Histograms
+### Histograms
 
 Histograms are like filters, but rather than outputting every match, it will
 create an aggregated count based on the extracted expression.
@@ -48,4 +88,5 @@ rare histogram -m '"(\w{3,4}) ([A-Za-z0-9/.@_-]+)' -e '{1} {2}' -b access.log
 
 ## See Also
 
-* [Regular Expressions](regexp.md)
\ No newline at end of file
+* [Regular Expressions](regexp.md)
+* [Examples](examples.md)
\ No newline at end of file
diff --git a/docs/usage/overview.md b/docs/usage/overview.md
index 7963c9f1..bca986ca 100644
--- a/docs/usage/overview.md
+++ b/docs/usage/overview.md
@@ -23,11 +23,11 @@ Read more at:
 
 ## Extraction (Matching)
 
-Extraction is denoted with `-m` (match) and is the process of reading a line in
-a file or set of files and parsing it with a regular expression into the
-match-groups denoted by the regex.
+Extraction is denoted with `-m` (regex) or `-d` (dissect) and is the process of reading
+a line in a file or set of files and parsing it with a regular expression into the
+match-groups denoted by the matcher.
 
-If the regex doesn't match, the line is discarded (a non-match)
+If the expression doesn't match, the line is discarded (a non-match)
 
 These match groups are then fed into the next stage, the expression.
 
@@ -62,6 +62,7 @@ Aggregator types:
 * `histogram` will count instances of the extracted key
 * `table` will count the key in 2 dimensions
 * `heatmap` will generate a 2D visualization using colored blocks to denote value
+* `sparkline` will generate a 2D visualization with the results being a sparkline
 * `bargraph` will create either a stacked or non-stacked bargraph based on 2 dimensions
 * `analyze` will use the key as a numeric value and compute mean/median/mode/stddev/percentiles
 * `reduce` allows evaluating data using expressions, and grouping/sorting the output
diff --git a/mkdocs.yml b/mkdocs.yml
index a3e6c422..e76e5fb1 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -19,6 +19,7 @@ nav:
       - JSON: usage/json.md
       - Funcs File: usage/funcsfile.md
       - Regular Expressions: usage/regexp.md
+      - Dissect Expressions: usage/dissect.md
     - CLI Docs: cli-help.md
   - Benchmarks: benchmarks.md
   - Contributing: contributing.md

From 1da5ff919cd9e89f866b3ce85cf9f3664cdca672 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Fri, 3 Jan 2025 21:38:18 -0500
Subject: [PATCH 12/14] Some renaming and tests

---
 pkg/extractor/extractor.go           | 20 ++++++++++----------
 pkg/matchers/dissect/case.go         |  8 ++++----
 pkg/matchers/dissect/dissect_test.go |  2 +-
 pkg/matchers/simple_test.go          | 17 +++++++++++++++++
 4 files changed, 32 insertions(+), 15 deletions(-)
 create mode 100644 pkg/matchers/simple_test.go

diff --git a/pkg/extractor/extractor.go b/pkg/extractor/extractor.go
index 4a9884df..16a09c6b 100644
--- a/pkg/extractor/extractor.go
+++ b/pkg/extractor/extractor.go
@@ -23,7 +23,7 @@ type InputBatch struct {
 type Match struct {
 	bLine      BString // Keep the pointer around next to line
 	Line       string  // Unsafe pointer to bLine (no-copy)
-	Indices    []int   // match indices as returned by regexp
+	Indices    []int   // match indices as returned by matcher
 	Extracted  string  // The extracted expression
 	LineNumber uint64  // Line number
 	Source     string  // Source name
@@ -32,8 +32,8 @@ type Match struct {
 // Config for the extractor
 type Config struct {
 	Matcher matchers.Factory // Matcher
-	Extract string           // Extract these values from regex (expression)
-	Workers int              // Workers to parse regex
+	Extract string           // Extract these values from matcher (expression)
+	Workers int              // Workers to parse matcher
 	Ignore  IgnoreSet        // Ignore these truthy expressions
 }
 
@@ -42,7 +42,7 @@ type Config struct {
 //	Expects someone to consume its ReadChan()
 type Extractor struct {
 	readChan       chan []Match
-	compiledRegexp matchers.Factory
+	matcherFactory matchers.Factory
 	readLines      uint64
 	matchedLines   uint64
 	ignoredLines   uint64
@@ -53,7 +53,7 @@ type Extractor struct {
 
 type extractorInstance struct {
 	*Extractor
-	re      matchers.Matcher
+	matcher matchers.Matcher
 	context *SliceSpaceExpressionContext
 }
 
@@ -76,7 +76,7 @@ func (s *Extractor) ReadChan() <-chan []Match {
 // async safe
 func (s *extractorInstance) processLineSync(source string, lineNum uint64, line BString) (Match, bool) {
 	atomic.AddUint64(&s.readLines, 1)
-	matches := s.re.FindSubmatchIndex(line)
+	matches := s.matcher.FindSubmatchIndex(line)
 
 	// Extract and forward to the ReadChan if there are matches
 	if len(matches) > 0 {
@@ -118,12 +118,12 @@ func (s *extractorInstance) processLineSync(source string, lineNum uint64, line
 func (s *Extractor) asyncWorker(wg *sync.WaitGroup, inputBatch <-chan InputBatch) {
 	defer wg.Done()
 
-	re := s.compiledRegexp.CreateInstance()
+	matcher := s.matcherFactory.CreateInstance()
 	si := extractorInstance{
 		Extractor: s,
-		re:        re,
+		matcher:   matcher,
 		context: &SliceSpaceExpressionContext{
-			nameTable: re.SubexpNameTable(),
+			nameTable: matcher.SubexpNameTable(),
 		},
 	}
 
@@ -158,7 +158,7 @@ func New(inputBatch <-chan InputBatch, config *Config) (*Extractor, error) {
 
 	extractor := Extractor{
 		readChan:       make(chan []Match, 5),
-		compiledRegexp: config.Matcher,
+		matcherFactory: config.Matcher,
 		keyBuilder:     compiledExpression,
 		config:         *config,
 		ignore:         config.Ignore,
diff --git a/pkg/matchers/dissect/case.go b/pkg/matchers/dissect/case.go
index cd2befab..9ccd868f 100644
--- a/pkg/matchers/dissect/case.go
+++ b/pkg/matchers/dissect/case.go
@@ -4,8 +4,8 @@ import "unicode"
 
 // Finds case-insensitive index of second string
 // ASSUMES second string is already lowered (optimization)
-func indexIgnoreCase(s, substr string) int {
-	n := len(substr)
+func indexIgnoreCase(s, loweredSubstr string) int {
+	n := len(loweredSubstr)
 	switch {
 	case n == 0:
 		return 0
@@ -13,7 +13,7 @@ func indexIgnoreCase(s, substr string) int {
 		return -1
 	case len(s) == n:
 		for i := 0; i < n; i++ {
-			if unicode.ToLower(rune(s[i])) != rune(substr[i]) {
+			if unicode.ToLower(rune(s[i])) != rune(loweredSubstr[i]) {
 				return -1
 			}
 		}
@@ -22,7 +22,7 @@ func indexIgnoreCase(s, substr string) int {
 		for i := 0; i <= len(s)-n; i++ {
 			match := true
 			for j := 0; j < n; j++ {
-				if unicode.ToLower(rune(s[i+j])) != rune(substr[j]) {
+				if unicode.ToLower(rune(s[i+j])) != rune(loweredSubstr[j]) {
 					match = false
 					break
 				}
diff --git a/pkg/matchers/dissect/dissect_test.go b/pkg/matchers/dissect/dissect_test.go
index 6ac1f3bd..67d7aed4 100644
--- a/pkg/matchers/dissect/dissect_test.go
+++ b/pkg/matchers/dissect/dissect_test.go
@@ -106,7 +106,7 @@ func TestIgnoreCase(t *testing.T) {
 
 // BenchmarkDissect-4   	13347456	        86.07 ns/op	      32 B/op	       0 allocs/op
 func BenchmarkDissect(b *testing.B) {
-	d, _ := CompileEx("t%{val} ", true)
+	d, _ := CompileEx("t%{val} ", false)
 	di := d.CreateInstance()
 	val := []byte("this is a test ")
 
diff --git a/pkg/matchers/simple_test.go b/pkg/matchers/simple_test.go
new file mode 100644
index 00000000..5d2b1469
--- /dev/null
+++ b/pkg/matchers/simple_test.go
@@ -0,0 +1,17 @@
+package matchers
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSimpleMatcherAndFactory(t *testing.T) {
+	matcher := ToFactory(&AlwaysMatch{}) // ToFactory isn't necessary, but will exercise the path
+	inst := matcher.CreateInstance()
+
+	assert.Empty(t, inst.SubexpNameTable())
+
+	assert.Equal(t, []int{0, 0}, inst.FindSubmatchIndex([]byte{}))
+	assert.Equal(t, []int{0, 2}, inst.FindSubmatchIndex([]byte("hi")))
+}

From 42ae5ab398d48d3cd02aaf7f8cb31b46fad6a4f7 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Fri, 3 Jan 2025 21:42:55 -0500
Subject: [PATCH 13/14] Clarify index match

---
 docs/usage/dissect.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/usage/dissect.md b/docs/usage/dissect.md
index a8a40137..919c647d 100644
--- a/docs/usage/dissect.md
+++ b/docs/usage/dissect.md
@@ -34,7 +34,14 @@ Will match:
 prefix bob : 123
 ```
 
-And will extract two keys:
+And extract 3 index-keys:
+```
+0: prefix bob : 123
+1: bob
+2: 123
+```
+
+And will extract two named keys:
 ```
 name=bob
 value=123

From 6243c3ef1aa763177a63fc3ced5cd8669d0201e2 Mon Sep 17 00:00:00 2001
From: Christopher LaPointe <cmlapointe11@gmail.com>
Date: Fri, 3 Jan 2025 21:44:17 -0500
Subject: [PATCH 14/14] Fix typo

---
 docs/usage/extractor.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage/extractor.md b/docs/usage/extractor.md
index 8dcc2524..b845d673 100644
--- a/docs/usage/extractor.md
+++ b/docs/usage/extractor.md
@@ -19,7 +19,7 @@ More than one matcher can **not** be specified at the same time.
 
 ### Regex
 
-A regex express is specified with `--match` or `-m`, and follows common
+A regex expression is specified with `--match` or `-m`, and follows common
 [regex syntax](regexp.md).
 
 When matching a regex, groups and keys are extracted both index and