zix99 · zix99 · Dec 14, 2019 · Dec 14, 2019 · Dec 14, 2019 · Dec 14, 2019
diff --git a/README.md b/README.md
@@ -215,7 +215,7 @@ Matched: 161,622 / 161,622
 
 ## Tabulate
 
-Create a 2D view (table) of data extracted from a file. Expression needs to yield a two dimensions separated by a tab.  Can either use `\t` or the `{tab a b}` helper.  First element is the column name, followed by the row name.
+Create a 2D view (table) of data extracted from a file. Expression needs to yield a two dimensions separated by a tab.  Can either use `\x00` or the `{$ a b}` helper.  First element is the column name, followed by the row name.
 
 ```
 NAME:
@@ -226,8 +226,8 @@ USAGE:
 
 DESCRIPTION:
    Summarizes the extracted data as a 2D data table.
-    The key is provided in the expression, and should be separated by a tab \t
-    character or via {tab a b} Where a is the column header, and b is the row
+    The key is provided in the expression, and should be separated by a tab \x00
+    character or via {$ a b} Where a is the column header, and b is the row
 
 OPTIONS:
    --follow, -f                 Read appended data as file grows
@@ -242,7 +242,7 @@ OPTIONS:
    --readers value, --wr value  Sets the number of concurrent readers (Infinite when -f) (default: 3)
    --ignore value, -i value     Ignore a match given a truthy expression (Can have multiple)
    --recursive, -R              Recursively walk a non-globbing path and search for plain-files
-   --delim value                Character to tabulate on. Use {tab} helper by default (default: "\t")
+   --delim value                Character to tabulate on. Use {$} helper by default (default: "\x00")
    --num value, -n value        Number of elements to display (default: 20)
    --cols value                 Number of columns to display (default: 10)
    --sortkey, --sk              Sort rows by key name rather than by values
@@ -251,7 +251,7 @@ OPTIONS:
 **Example:**
 
 ```bash
-$ rare tabulate -m "(\d{3}) (\d+)" -e "{tab {1} {bucket {2} 100000}}" -sk access.log
+$ rare tabulate -m "(\d{3}) (\d+)" -e "{$ {1} {bucket {2} 100000}}" -sk access.log
 
          200      404      304      403      301      206      
 0        153,271  860      53       14       12       2                 

diff --git a/cmd/tabulate.go b/cmd/tabulate.go
@@ -70,14 +70,14 @@ func tabulateCommand() *cli.Command {
 		ShortName: "t",
 		Usage:     "Create a 2D summarizing table of extracted data",
 		Description: `Summarizes the extracted data as a 2D data table.
-		The key is provided in the expression, and should be separated by a tab \t
-		character or via {tab a b} Where a is the column header, and b is the row`,
+		The key is provided in the expression, and should be separated by a tab \x00
+		character or via {$ a b} Where a is the column header, and b is the row`,
 		Action: tabulateFunction,
 		Flags: []cli.Flag{
 			cli.StringFlag{
 				Name:  "delim",
-				Usage: "Character to tabulate on. Use {tab} helper by default",
-				Value: "\t",
+				Usage: "Character to tabulate on. Use {$} helper by default",
+				Value: "\x00",
 			},
 			cli.IntFlag{
 				Name:  "num,n",

diff --git a/docs/expressions.md b/docs/expressions.md
@@ -142,7 +142,12 @@ Syntax: `{tab a b c ...}`
 
 Concatenates the values of the arguments separated by a table character.
 
-Good for tabulate output separation.
+## Arrays / Null Separator
+
+Syntax: `{$ a b c}`
+
+Concatenates a set of arguments with a null separator.  Commonly used
+to form arrays that have meaning for a given aggregator.
 
 ## Paths
 

diff --git a/pkg/aggregation/table.go b/pkg/aggregation/table.go
@@ -1,9 +1,9 @@
 package aggregation
 
 import (
+	"rare/pkg/stringSplitter"
 	"sort"
 	"strconv"
-	"strings"
 )
 
 type TableRow struct {
@@ -34,16 +34,22 @@ func (s *TableAggregator) ParseErrors() uint64 {
 
 // Samples item like "<column><delim><row><delim><count>"
 func (s *TableAggregator) Sample(ele string) {
-	parts := strings.Split(ele, s.delim)
-	if len(parts) == 2 {
-		s.SampleItem(parts[0], parts[1], 1)
-	} else if len(parts) == 3 {
-		inc, err := strconv.ParseInt(parts[2], 10, 64)
+	splitter := stringSplitter.Splitter{
+		S:     ele,
+		Delim: s.delim,
+	}
+	part0 := splitter.Next()
+	part1, has1 := splitter.NextOk()
+	part2, has2 := splitter.NextOk()
+	if has2 {
+		inc, err := strconv.ParseInt(part2, 10, 64)
 		if err != nil {
 			s.errors++
 		} else {
-			s.SampleItem(parts[0], parts[1], inc)
+			s.SampleItem(part0, part1, inc)
 		}
+	} else if has1 {
+		s.SampleItem(part0, part1, 1)
 	} else {
 		s.errors++
 	}

diff --git a/pkg/expressions/funcs.go b/pkg/expressions/funcs.go
@@ -46,10 +46,13 @@ var defaultFunctions = map[string]KeyBuilderFunction{
 	"prefix": KeyBuilderFunction(kfPrefix),
 	"suffix": KeyBuilderFunction(kfSuffix),
 	"format": KeyBuilderFunction(kfFormat),
-	"tab":    KeyBuilderFunction(kfTab),
 	"substr": KeyBuilderFunction(kfSubstr),
 	"select": KeyBuilderFunction(kfSelect),
 
+	// Separation
+	"tab": kfSeparate('\t'),
+	"$":   kfSeparate('\x00'),
+
 	// Pathing
 	"basename": kfPathBase,
 	"dirname":  kfPathDir,

diff --git a/pkg/expressions/funcsStrings.go b/pkg/expressions/funcsStrings.go
@@ -151,17 +151,19 @@ func kfBytesize(args []KeyBuilderStage) KeyBuilderStage {
 	})
 }
 
-func kfTab(args []KeyBuilderStage) KeyBuilderStage {
-	if len(args) == 0 {
-		return stageLiteral("")
-	}
-	return KeyBuilderStage(func(context KeyBuilderContext) string {
-		var sb strings.Builder
-		sb.WriteString(args[0](context))
-		for _, arg := range args[1:] {
-			sb.WriteRune('\t')
-			sb.WriteString(arg(context))
+func kfSeparate(delim rune) KeyBuilderFunction {
+	return func(args []KeyBuilderStage) KeyBuilderStage {
+		if len(args) == 0 {
+			return stageLiteral("")
 		}
-		return sb.String()
-	})
+		return KeyBuilderStage(func(context KeyBuilderContext) string {
+			var sb strings.Builder
+			sb.WriteString(args[0](context))
+			for _, arg := range args[1:] {
+				sb.WriteRune(delim)
+				sb.WriteString(arg(context))
+			}
+			return sb.String()
+		})
+	}
 }
diff --git a/pkg/expressions/funcs_test.go b/pkg/expressions/funcs_test.go
@@ -103,6 +103,11 @@ func TestTabulator(t *testing.T) {
 	testExpression(t, mockContext(), "{tab a b} {tab a b c}", "a\tb a\tb\tc")
 }
 
+func TestArray(t *testing.T) {
+	testExpression(t, mockContext("q"), "{$ {0} {1} 22}", "q\x00\x0022")
+	testExpression(t, mockContext("q"), `{$ "{0} hi" 22}`, "q hi\x0022")
+}
+
 func TestHumanize(t *testing.T) {
 	testExpression(t, mockContext(), "{hi 12345} {hf 12345.123512} {hi abc} {hf abc}",
 		"12,345 12,345.1235 <BAD-TYPE> <BAD-TYPE>")

diff --git a/pkg/stringSplitter/splitter.go b/pkg/stringSplitter/splitter.go
@@ -0,0 +1,41 @@
+package stringSplitter
+
+import "strings"
+
+/*
+A splitter without making heap memory
+*/
+
+type Splitter struct {
+	S     string
+	Delim string
+	next  int
+}
+
+func (s *Splitter) Next() (ret string) {
+	if s.next < 0 {
+		return ""
+	}
+
+	idx := strings.Index(s.S[s.next:], s.Delim)
+	if idx < 0 {
+		ret = s.S[s.next:]
+		s.next = -1
+		return
+	}
+	idx += s.next
+
+	ret = s.S[s.next:idx]
+	s.next = idx + 1
+	return
+}
+
+func (s *Splitter) NextOk() (ret string, ok bool) {
+	ok = !s.Done()
+	ret = s.Next()
+	return
+}
+
+func (s *Splitter) Done() bool {
+	return s.next < 0
+}
diff --git a/pkg/stringSplitter/splitter_test.go b/pkg/stringSplitter/splitter_test.go
@@ -0,0 +1,72 @@
+package stringSplitter
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestStringSplitter(t *testing.T) {
+	s := Splitter{
+		S:     "abc\x00efg\x00123\x00",
+		Delim: "\x00",
+	}
+	assert.Equal(t, "abc", s.Next())
+	assert.Equal(t, "efg", s.Next())
+	assert.Equal(t, "123", s.Next())
+	assert.False(t, s.Done())
+	assert.Equal(t, "", s.Next())
+	assert.True(t, s.Done())
+}
+
+func TestSplitterNextOk(t *testing.T) {
+	s := Splitter{
+		S:     "abc\x00efg",
+		Delim: "\x00",
+	}
+	part0, ok0 := s.NextOk()
+	assert.Equal(t, "abc", part0)
+	assert.True(t, ok0)
+
+	part1, ok1 := s.NextOk()
+	assert.Equal(t, "efg", part1)
+	assert.True(t, ok1)
+
+	part2, ok2 := s.NextOk()
+	assert.Equal(t, "", part2)
+	assert.False(t, ok2)
+}
+
+func BenchmarkStringSplit(b *testing.B) {
+	total := 0
+	for n := 0; n < b.N; n++ {
+		ele := strings.Split("abc\x00efg\x00123\x00", "\x00")
+		total += len(ele)
+	}
+}
+
+func BenchmarkSplitter(b *testing.B) {
+	total := 0
+	for n := 0; n < b.N; n++ {
+		splitter := Splitter{S: "abc\x00efg\x00123\x00", Delim: "\x00"}
+		for !splitter.Done() {
+			splitter.Next()
+			total++
+		}
+	}
+}
+
+func BenchmarkSplitterNextOk(b *testing.B) {
+	total := 0
+	for n := 0; n < b.N; n++ {
+		splitter := Splitter{S: "abc\x00efg\x00123\x00", Delim: "\x00"}
+		for {
+			_, ok := splitter.NextOk()
+			if !ok {
+				break
+			}
+			total++
+		}
+	}
+}