diff --git a/cmd/filter.go b/cmd/filter.go index e9546c1..61b6a45 100644 --- a/cmd/filter.go +++ b/cmd/filter.go @@ -1,7 +1,7 @@ package cmd import ( - "fmt" + "bufio" "os" "rare/cmd/helpers" @@ -21,36 +21,44 @@ func filterFunction(c *cli.Context) error { batcher := helpers.BuildBatcherFromArguments(c) extractor := helpers.BuildExtractorFromArgumentsEx(c, batcher, "\t") + stdout := bufio.NewWriter(os.Stdout) + readChan := extractor.ReadFull() OUTER_LOOP: - for { - matchBatch, more := <-readChan - if !more { - break - } + for matchBatch := range readChan { for _, match := range matchBatch { if writeLines { - fmt.Printf("%s %s: ", color.Wrap(color.BrightGreen, match.Source), color.Wrapi(color.BrightYellow, match.LineNumber)) + stdout.WriteString(color.Wrap(color.BrightGreen, match.Source)) + stdout.WriteString(" ") + stdout.WriteString(color.Wrapi(color.BrightYellow, match.LineNumber)) + stdout.WriteString(": ") } if !customExtractor { if len(match.Indices) == 2 { // Single match, highlight entire phrase - fmt.Println(color.WrapIndices(match.Line, match.Indices)) + stdout.WriteString(color.WrapIndices(match.Line, match.Indices)) } else { // Multi-match groups, highlight individual groups - fmt.Println(color.WrapIndices(match.Line, match.Indices[2:])) + stdout.WriteString(color.WrapIndices(match.Line, match.Indices[2:])) } } else { - fmt.Println(match.Extracted) + stdout.WriteString(match.Extracted) } + stdout.WriteString("\n") readLines++ if numLineLimit > 0 && readLines >= numLineLimit { break OUTER_LOOP } } + + // Flush after each batch to make file-following work as expected + stdout.Flush() } + // Final flush + stdout.Flush() + if numLineLimit > 0 { helpers.FWriteMatchSummary(os.Stderr, readLines, numLineLimit) os.Stderr.WriteString("\n") diff --git a/cmd/filter_test.go b/cmd/filter_test.go index a561ddb..b937f2c 100644 --- a/cmd/filter_test.go +++ b/cmd/filter_test.go @@ -23,6 +23,13 @@ func TestFilterExtract(t *testing.T) { assert.Equal(t, "Matched: 3 / 6\n", eout) } +func TestFilterLine(t *testing.T) { + out, eout, err := testCommandCapture(filterCommand(), `-l -m (\d+) -e "{1}" testdata/log.txt`) + assert.NoError(t, err) + assert.Equal(t, "testdata/log.txt 1: 5\ntestdata/log.txt 2: 22\ntestdata/log.txt 3: 5\n", out) + assert.Equal(t, "Matched: 3 / 6\n", eout) +} + func TestFilterMultiExtract(t *testing.T) { out, eout, err := testCommandCapture(filterCommand(), `-m (\d+) -e "{1}" -e "b-{1}" testdata/log.txt`) assert.NoError(t, err) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 036f7aa..2589e4b 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -24,9 +24,9 @@ $ time rare filter -m '" (\d{3})' -e "{1}" -z testdata/*.gz | wc -l Matched: 8,373,328 / 8,373,328 8373328 -real 0m12.550s -user 0m17.833s -sys 0m15.953s +real 0m3.266s +user 0m10.607s +sys 0m0.769s ``` When aggregating data, `rare` is significantly faster than alternatives.