Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- name: Test
run: |
go test -v -race -coverprofile=coverage.txt -covermode=atomic ./...
go test -tags=pcre2 rare/pkg/fastregex
go test -tags=pcre2 rare/pkg/matchers/fastregex
- name: StaticCheck
run: |
go run honnef.co/go/tools/cmd/[email protected] ./...
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ See [rare.zdyn.net](https://rare.zdyn.net) or the [docs/ folder](docs/) for the
## Features

* Multiple summary formats including: filter (like grep), histogram, bar graphs, tables, heatmaps, reduce, and numerical analysis
* Parse using regex (`-m`) or dissect tokenizer (`-d`)
* File glob expansions (eg `/var/log/*` or `/var/log/*/*.log`) and `-R`
* Optional gzip decompression (with `-z`)
* Following `-f` or re-open following `-F` (use `--poll` to poll, and `--tail` to tail)
Expand Down
54 changes: 48 additions & 6 deletions cmd/helpers/extractorBuilder.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
package helpers

import (
"errors"
"os"
"rare/pkg/expressions"
"rare/pkg/extractor"
"rare/pkg/extractor/batchers"
"rare/pkg/extractor/dirwalk"
"rare/pkg/logger"
"rare/pkg/matchers"
"rare/pkg/matchers/dissect"
"rare/pkg/matchers/fastregex"
"runtime"
"strings"

Expand Down Expand Up @@ -74,15 +78,15 @@ func BuildExtractorFromArguments(c *cli.Context, batcher *batchers.Batcher) *ext

func BuildExtractorFromArgumentsEx(c *cli.Context, batcher *batchers.Batcher, sep string) *extractor.Extractor {
config := extractor.Config{
Posix: c.Bool("posix"),
Regex: c.String("match"),
Extract: strings.Join(c.StringSlice("extract"), sep),
Workers: c.Int("workers"),
}

if c.Bool("ignore-case") {
config.Regex = "(?i)" + config.Regex
matcher, err := BuildMatcherFromArguments(c)
if err != nil {
logger.Fatalln(ExitCodeInvalidUsage, err)
}
config.Matcher = matcher

ignoreSlice := c.StringSlice("ignore")
if len(ignoreSlice) > 0 {
Expand All @@ -100,6 +104,38 @@ func BuildExtractorFromArgumentsEx(c *cli.Context, batcher *batchers.Batcher, se
return ret
}

func BuildMatcherFromArguments(c *cli.Context) (matchers.Factory, error) {
var (
matchExpr = c.String("match")
dissectExpr = c.String("dissect")
posix = c.Bool("posix")
ignoreCase = c.Bool("ignore-case")
)

switch {
case c.IsSet("match") && c.IsSet("dissect"):
return nil, errors.New("match and dissect conflict")
case c.IsSet("dissect"):
d, err := dissect.CompileEx(dissectExpr, ignoreCase)
if err != nil {
return nil, err
}
return matchers.ToFactory(d), nil
case c.IsSet("match"):
if ignoreCase {
matchExpr = "(?i)" + matchExpr
}

r, err := fastregex.CompileEx(matchExpr, posix)
if err != nil {
return nil, err
}
return matchers.ToFactory(r), nil
default:
return &matchers.AlwaysMatch{}, nil
}
}

func getExtractorFlags() []cli.Flag {
workerCount := runtime.NumCPU()/2 + 1

Expand Down Expand Up @@ -146,12 +182,18 @@ func getExtractorFlags() []cli.Flag {
Usage: "Compile regex as against posix standard",
},
&cli.StringFlag{
Name: "match,m",
Name: "match",
Aliases: []string{"m"},
Category: cliCategoryMatching,
Usage: "Regex to create match groups to summarize on",
Value: ".*",
},
&cli.StringFlag{
Name: "dissect",
Aliases: []string{"d"},
Category: cliCategoryMatching,
Usage: "Dissect expression create match groups to summarize on",
},
&cli.StringSliceFlag{
Name: "extract",
Aliases: []string{"e"},
Expand All @@ -169,7 +211,7 @@ func getExtractorFlags() []cli.Flag {
Name: "ignore-case",
Aliases: []string{"I"},
Category: cliCategoryMatching,
Usage: "Augment regex to be case insensitive",
Usage: "Augment matcher to be case insensitive",
},
&cli.IntFlag{
Name: "batch",
Expand Down
12 changes: 11 additions & 1 deletion cmd/helpers/extractorBuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ func TestBuildingExtractorFromContext(t *testing.T) {
assert.NoError(t, runApp(""))
assert.NoError(t, runApp(`-I -i "{eq {0} abc}" ../testdata/log.txt`))
assert.NoError(t, runApp(`-f ../testdata/log.txt`))
assert.NoError(t, runApp(`-m ".*" ../testdata/log.txt`))
assert.NoError(t, runApp(`-I -m ".*" ../testdata/log.txt`))
assert.NoError(t, runApp(`-d "%{}" ../testdata/log.txt`))
assert.NoError(t, runApp(`-I -d "%{}" ../testdata/log.txt`))
testLogFatal(t, 2, func() {
runApp("--batch 0 ../testdata/log.txt")
})
Expand All @@ -77,5 +81,11 @@ func TestBuildingExtractorFromContext(t *testing.T) {
testLogFatal(t, 2, func() {
runApp(`-i "{0" -`)
})
assert.Equal(t, 3, actionCalled)
testLogFatal(t, 2, func() {
runApp(`-m regex -d dissect -`)
})
testLogFatal(t, 2, func() {
runApp(`-d "%{unclosed" -`)
})
assert.Equal(t, 7, actionCalled)
}
4 changes: 3 additions & 1 deletion cmd/helpers/updatingAggregator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"io"
"rare/pkg/extractor"
"rare/pkg/extractor/batchers"
"rare/pkg/matchers"
"rare/pkg/matchers/fastregex"
"strings"
"testing"

Expand Down Expand Up @@ -31,7 +33,7 @@ func TestAggregationLoop(t *testing.T) {
// Build a real extractor
batcher := batchers.OpenReaderToChan("test", io.NopCloser(strings.NewReader(testData)), 1, 1)
ex, err := extractor.New(batcher.BatchChan(), &extractor.Config{
Regex: `(\d+)`,
Matcher: matchers.ToFactory(fastregex.MustCompile(`(\d+)`)),
Extract: "val:{1}",
Workers: 1,
})
Expand Down
32 changes: 24 additions & 8 deletions docs/cli-help.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ Filter incoming results with search criteria, and output raw matches

**--batch-buffer**="": Specifies how many batches to read-ahead. Impacts memory usage, can improve performance (default: 6)

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])

**--follow, -f**: Read appended data as file grows
Expand All @@ -75,7 +77,7 @@ Filter incoming results with search criteria, and output raw matches

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--line, -l**: Output source file and line number

Expand Down Expand Up @@ -113,6 +115,8 @@ Summarize results by extracting them to a histogram

**--csv, -o**="": Write final results to csv. Use - to output to stdout

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extra, -x**: Alias for -b --percentage

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
Expand All @@ -123,7 +127,7 @@ Summarize results by extracting them to a histogram

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--match, -m**="": Regex to create match groups to summarize on (default: .*)

Expand Down Expand Up @@ -167,6 +171,8 @@ Create a 2D heatmap of extracted data

**--delim**="": Character to tabulate on. Use {$} helper by default (default: \x00)

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])

**--follow, -f**: Read appended data as file grows
Expand All @@ -175,7 +181,7 @@ Create a 2D heatmap of extracted data

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--match, -m**="": Regex to create match groups to summarize on (default: .*)

Expand Down Expand Up @@ -223,6 +229,8 @@ Create rows of sparkline graphs

**--delim**="": Character to tabulate on. Use {$} helper by default (default: \x00)

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])

**--follow, -f**: Read appended data as file grows
Expand All @@ -231,7 +239,7 @@ Create rows of sparkline graphs

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--match, -m**="": Regex to create match groups to summarize on (default: .*)

Expand Down Expand Up @@ -273,6 +281,8 @@ Create a bargraph of the given 1 or 2 dimension data

**--csv, -o**="": Write final results to csv. Use - to output to stdout

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])

**--follow, -f**: Read appended data as file grows
Expand All @@ -281,7 +291,7 @@ Create a bargraph of the given 1 or 2 dimension data

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--match, -m**="": Regex to create match groups to summarize on (default: .*)

Expand Down Expand Up @@ -317,6 +327,8 @@ Numerical analysis on a set of filtered data

**--batch-buffer**="": Specifies how many batches to read-ahead. Impacts memory usage, can improve performance (default: 6)

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extra, -x**: Displays extra analysis on the data (Requires more memory and cpu)

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
Expand All @@ -327,7 +339,7 @@ Numerical analysis on a set of filtered data

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--match, -m**="": Regex to create match groups to summarize on (default: .*)

Expand Down Expand Up @@ -367,6 +379,8 @@ Create a 2D summarizing table of extracted data

**--delim**="": Character to tabulate on. Use {$} helper by default (default: \x00)

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extra, -x**: Display row and column totals

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{0}])
Expand All @@ -377,7 +391,7 @@ Create a 2D summarizing table of extracted data

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--match, -m**="": Regex to create match groups to summarize on (default: .*)

Expand Down Expand Up @@ -421,6 +435,8 @@ Aggregate the results of a query based on an expression, pulling customized summ

**--csv, -o**="": Write final results to csv. Use - to output to stdout

**--dissect, -d**="": Dissect expression create match groups to summarize on

**--extract, -e**="": Expression that will generate the key to group by. Specify multiple times for multi-dimensions or use {$} helper (default: [{@}])

**--follow, -f**: Read appended data as file grows
Expand All @@ -431,7 +447,7 @@ Aggregate the results of a query based on an expression, pulling customized summ

**--ignore, -i**="": Ignore a match given a truthy expression (Can have multiple)

**--ignore-case, -I**: Augment regex to be case insensitive
**--ignore-case, -I**: Augment matcher to be case insensitive

**--initial**="": Specify the default initial value for any accumulators that don't specify (default: 0)

Expand Down
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Supports various CLI-based graphing and metric formats (filter (grep-like), hist
## Features

* Multiple summary formats including: filter (like grep), histogram, bar graphs, tables, heatmaps, sparklines, reduce, and numerical analysis
* Parse using regex (`-m`) or dissect tokenizer (`-d`)
* File glob expansions (eg `/var/log/*` or `/var/log/*/*.log`) and `-R`
* Optional gzip decompression (with `-z`)
* Following `-f` or re-open following `-F` (use `--poll` to poll, and `--tail` to tail)
Expand Down
75 changes: 75 additions & 0 deletions docs/usage/dissect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Dissect Syntax

*Dissect* is a simple token-based search algorithm, and can
be up to 10x faster than regex (and 40% faster than PCRE).

It works by searching for for constant delimiters in a string
and extracting the text between the tokens as named keys.

*rare* implements a subset of the full dissect algorithm.

**Syntax Example:**
```
prefix %{name} : %{value} - %{?ignored}
```

## Syntax

- Anything in a `%{}` is a variable token.
- A blank token, or a token that starts with `?` is skipped. eg `%{}` or `%{?skipped}`
- Tokens are extracted by both name and index (in the order they appear).
- Index `{0}` is the full match, including the delimiters
- Patterns don't need to match the entire line

## Examples

### Simple

```
prefix %{name} : %{value}
```

Will match:
```
prefix bob : 123
```

And extract 3 index-keys:
```
0: prefix bob : 123
1: bob
2: 123
```

And will extract two named keys:
```
name=bob
value=123
```

### Nginx Logs

As a simple example, to parse nginx logs that look like:

```
104.238.185.46 - - [19/Aug/2019:02:26:25 +0000] "GET / HTTP/1.1" 200 546 "-" "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/98 Safari/537.4 (StatusCake)"
```

The following dissect expression can be used:

```
%{ip} - - [%{timestamp}] "%{verb} %{path} HTTP/%{?http-version}" %{status} %{size} "-" "%{useragent}"
```

Which, as json, will return:
```json
{
"timestamp": "12/Dec/2019:17:54:13 +0000",
"verb": "POST",
"path": "/temtel.php",
"status": 404,
"size": 571,
"useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36",
"ip": "203.113.174.104"
}
```
Loading
Loading