Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ Matched: 161,622 / 161,622

## Tabulate

Create a 2D view (table) of data extracted from a file. Expression needs to yield a two dimensions separated by a tab. Can either use `\t` or the `{tab a b}` helper. First element is the column name, followed by the row name.
Create a 2D view (table) of data extracted from a file. Expression needs to yield a two dimensions separated by a tab. Can either use `\x00` or the `{$ a b}` helper. First element is the column name, followed by the row name.

```
NAME:
Expand All @@ -226,8 +226,8 @@ USAGE:

DESCRIPTION:
Summarizes the extracted data as a 2D data table.
The key is provided in the expression, and should be separated by a tab \t
character or via {tab a b} Where a is the column header, and b is the row
The key is provided in the expression, and should be separated by a tab \x00
character or via {$ a b} Where a is the column header, and b is the row

OPTIONS:
--follow, -f Read appended data as file grows
Expand All @@ -242,7 +242,7 @@ OPTIONS:
--readers value, --wr value Sets the number of concurrent readers (Infinite when -f) (default: 3)
--ignore value, -i value Ignore a match given a truthy expression (Can have multiple)
--recursive, -R Recursively walk a non-globbing path and search for plain-files
--delim value Character to tabulate on. Use {tab} helper by default (default: "\t")
--delim value Character to tabulate on. Use {$} helper by default (default: "\x00")
--num value, -n value Number of elements to display (default: 20)
--cols value Number of columns to display (default: 10)
--sortkey, --sk Sort rows by key name rather than by values
Expand All @@ -251,7 +251,7 @@ OPTIONS:
**Example:**

```bash
$ rare tabulate -m "(\d{3}) (\d+)" -e "{tab {1} {bucket {2} 100000}}" -sk access.log
$ rare tabulate -m "(\d{3}) (\d+)" -e "{$ {1} {bucket {2} 100000}}" -sk access.log

200 404 304 403 301 206
0 153,271 860 53 14 12 2
Expand Down
8 changes: 4 additions & 4 deletions cmd/tabulate.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,14 @@ func tabulateCommand() *cli.Command {
ShortName: "t",
Usage: "Create a 2D summarizing table of extracted data",
Description: `Summarizes the extracted data as a 2D data table.
The key is provided in the expression, and should be separated by a tab \t
character or via {tab a b} Where a is the column header, and b is the row`,
The key is provided in the expression, and should be separated by a tab \x00
character or via {$ a b} Where a is the column header, and b is the row`,
Action: tabulateFunction,
Flags: []cli.Flag{
cli.StringFlag{
Name: "delim",
Usage: "Character to tabulate on. Use {tab} helper by default",
Value: "\t",
Usage: "Character to tabulate on. Use {$} helper by default",
Value: "\x00",
},
cli.IntFlag{
Name: "num,n",
Expand Down
7 changes: 6 additions & 1 deletion docs/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,12 @@ Syntax: `{tab a b c ...}`

Concatenates the values of the arguments separated by a table character.

Good for tabulate output separation.
## Arrays / Null Separator

Syntax: `{$ a b c}`

Concatenates a set of arguments with a null separator. Commonly used
to form arrays that have meaning for a given aggregator.

## Paths

Expand Down
20 changes: 13 additions & 7 deletions pkg/aggregation/table.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package aggregation

import (
"rare/pkg/stringSplitter"
"sort"
"strconv"
"strings"
)

type TableRow struct {
Expand Down Expand Up @@ -34,16 +34,22 @@ func (s *TableAggregator) ParseErrors() uint64 {

// Samples item like "<column><delim><row><delim><count>"
func (s *TableAggregator) Sample(ele string) {
parts := strings.Split(ele, s.delim)
if len(parts) == 2 {
s.SampleItem(parts[0], parts[1], 1)
} else if len(parts) == 3 {
inc, err := strconv.ParseInt(parts[2], 10, 64)
splitter := stringSplitter.Splitter{
S: ele,
Delim: s.delim,
}
part0 := splitter.Next()
part1, has1 := splitter.NextOk()
part2, has2 := splitter.NextOk()
if has2 {
inc, err := strconv.ParseInt(part2, 10, 64)
if err != nil {
s.errors++
} else {
s.SampleItem(parts[0], parts[1], inc)
s.SampleItem(part0, part1, inc)
}
} else if has1 {
s.SampleItem(part0, part1, 1)
} else {
s.errors++
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/expressions/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,13 @@ var defaultFunctions = map[string]KeyBuilderFunction{
"prefix": KeyBuilderFunction(kfPrefix),
"suffix": KeyBuilderFunction(kfSuffix),
"format": KeyBuilderFunction(kfFormat),
"tab": KeyBuilderFunction(kfTab),
"substr": KeyBuilderFunction(kfSubstr),
"select": KeyBuilderFunction(kfSelect),

// Separation
"tab": kfSeparate('\t'),
"$": kfSeparate('\x00'),

// Pathing
"basename": kfPathBase,
"dirname": kfPathDir,
Expand Down
26 changes: 14 additions & 12 deletions pkg/expressions/funcsStrings.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,19 @@ func kfBytesize(args []KeyBuilderStage) KeyBuilderStage {
})
}

func kfTab(args []KeyBuilderStage) KeyBuilderStage {
if len(args) == 0 {
return stageLiteral("")
}
return KeyBuilderStage(func(context KeyBuilderContext) string {
var sb strings.Builder
sb.WriteString(args[0](context))
for _, arg := range args[1:] {
sb.WriteRune('\t')
sb.WriteString(arg(context))
func kfSeparate(delim rune) KeyBuilderFunction {
return func(args []KeyBuilderStage) KeyBuilderStage {
if len(args) == 0 {
return stageLiteral("")
}
return sb.String()
})
return KeyBuilderStage(func(context KeyBuilderContext) string {
var sb strings.Builder
sb.WriteString(args[0](context))
for _, arg := range args[1:] {
sb.WriteRune(delim)
sb.WriteString(arg(context))
}
return sb.String()
})
}
}
5 changes: 5 additions & 0 deletions pkg/expressions/funcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ func TestTabulator(t *testing.T) {
testExpression(t, mockContext(), "{tab a b} {tab a b c}", "a\tb a\tb\tc")
}

func TestArray(t *testing.T) {
testExpression(t, mockContext("q"), "{$ {0} {1} 22}", "q\x00\x0022")
testExpression(t, mockContext("q"), `{$ "{0} hi" 22}`, "q hi\x0022")
}

func TestHumanize(t *testing.T) {
testExpression(t, mockContext(), "{hi 12345} {hf 12345.123512} {hi abc} {hf abc}",
"12,345 12,345.1235 <BAD-TYPE> <BAD-TYPE>")
Expand Down
41 changes: 41 additions & 0 deletions pkg/stringSplitter/splitter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package stringSplitter

import "strings"

/*
A splitter without making heap memory
*/

type Splitter struct {
S string
Delim string
next int
}

func (s *Splitter) Next() (ret string) {
if s.next < 0 {
return ""
}

idx := strings.Index(s.S[s.next:], s.Delim)
if idx < 0 {
ret = s.S[s.next:]
s.next = -1
return
}
idx += s.next

ret = s.S[s.next:idx]
s.next = idx + 1
return
}

func (s *Splitter) NextOk() (ret string, ok bool) {
ok = !s.Done()
ret = s.Next()
return
}

func (s *Splitter) Done() bool {
return s.next < 0
}
72 changes: 72 additions & 0 deletions pkg/stringSplitter/splitter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package stringSplitter

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func TestStringSplitter(t *testing.T) {
s := Splitter{
S: "abc\x00efg\x00123\x00",
Delim: "\x00",
}
assert.Equal(t, "abc", s.Next())
assert.Equal(t, "efg", s.Next())
assert.Equal(t, "123", s.Next())
assert.False(t, s.Done())
assert.Equal(t, "", s.Next())
assert.True(t, s.Done())
}

func TestSplitterNextOk(t *testing.T) {
s := Splitter{
S: "abc\x00efg",
Delim: "\x00",
}
part0, ok0 := s.NextOk()
assert.Equal(t, "abc", part0)
assert.True(t, ok0)

part1, ok1 := s.NextOk()
assert.Equal(t, "efg", part1)
assert.True(t, ok1)

part2, ok2 := s.NextOk()
assert.Equal(t, "", part2)
assert.False(t, ok2)
}

func BenchmarkStringSplit(b *testing.B) {
total := 0
for n := 0; n < b.N; n++ {
ele := strings.Split("abc\x00efg\x00123\x00", "\x00")
total += len(ele)
}
}

func BenchmarkSplitter(b *testing.B) {
total := 0
for n := 0; n < b.N; n++ {
splitter := Splitter{S: "abc\x00efg\x00123\x00", Delim: "\x00"}
for !splitter.Done() {
splitter.Next()
total++
}
}
}

func BenchmarkSplitterNextOk(b *testing.B) {
total := 0
for n := 0; n < b.N; n++ {
splitter := Splitter{S: "abc\x00efg\x00123\x00", Delim: "\x00"}
for {
_, ok := splitter.NextOk()
if !ok {
break
}
total++
}
}
}