Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pkg/extractor/dirwalk/globExpand.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"os"
"path/filepath"
"rare/pkg/extractor/dirwalk/iterwalk"
"sync/atomic"
)

Expand Down Expand Up @@ -73,7 +74,7 @@ func (s *Walker) recurseWalk(c chan<- string, p string, visited map[string]strin
rootDevId = getDeviceId(p)
}

filepath.WalkDir(p, func(walkPath string, info os.DirEntry, err error) error {
iterwalk.WalkDir(p, func(walkPath string, info os.DirEntry, err error) error {
switch {
case err != nil: // error
s.onError(fmt.Errorf("path error: %w", err))
Expand Down
7 changes: 5 additions & 2 deletions pkg/extractor/dirwalk/globExpand_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,11 @@ func TestNoDoubleTraverseSymlink(t *testing.T) {
}

files := collectChan(walker.Walk(p))
assert.Equal(t, 1, countContains(files, "op1"))
assert.Equal(t, 0, countContains(files, "op2"))

// exclusive or on the two possible options (should walk one of them)
has1, has2 := countContains(files, "op1") > 0, countContains(files, "op2") > 0
assert.True(t, has1 != has2)

assert.True(t, hadError)
assert.Equal(t, uint64(0), walker.ExcludedCount())
}
Expand Down
87 changes: 87 additions & 0 deletions pkg/extractor/dirwalk/iterwalk/iterwalk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package iterwalk

import (
"io"
"io/fs"
"os"
"path/filepath"
)

/*
This is a fork of filepath.WalkDir that reimplements the os.ReadDir function
with one that opens and incrementally iterates on the paths without sorting.

For directories with many files, this both improves response time as well as
reducing the overall query time of the path. For smaller directories it does
add some overhead, but it's negligible (microseconds)
*/

const readBatchSize = 1000

// This is an exact copy of `filepath.WalkDir`
func WalkDir(root string, fn fs.WalkDirFunc) error {
info, err := os.Lstat(root)
if err != nil {
err = fn(root, nil, err)
} else {
err = walkDir(root, fs.FileInfoToDirEntry(info), fn)
}
if err == filepath.SkipDir || err == filepath.SkipAll {
return nil
}
return err
}

// Copy of filepath.walkDir, but switching from os.ReadDir to iterDir
func walkDir(path string, d fs.DirEntry, walkDirFn fs.WalkDirFunc) error {
if err := walkDirFn(path, d, nil); err != nil || !d.IsDir() {
if err == filepath.SkipDir && d.IsDir() {
// Successfully skipped directory.
err = nil
}
return err
}

f, err := os.Open(path)
if err != nil {
// Second call, to report ReadDir error.
err = walkDirFn(path, d, err)
if err != nil {
if err == filepath.SkipDir && d.IsDir() {
err = nil
}
return err
}
}
defer f.Close()

OUTER:
for {
dirs, err := f.ReadDir(readBatchSize)
if err != nil && err != io.EOF {
err = walkDirFn(path, d, err)
if err != nil {
if err == filepath.SkipDir && d.IsDir() {
err = nil
}
return err
}
}

if len(dirs) == 0 {
break
}

for _, d1 := range dirs {
path1 := filepath.Join(path, d1.Name())
if err := walkDir(path1, d1, walkDirFn); err != nil {
if err == filepath.SkipDir {
break OUTER
}
return err
}
}
}

return nil
}
111 changes: 111 additions & 0 deletions pkg/extractor/dirwalk/iterwalk/iterwalk_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package iterwalk

import (
"io/fs"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/stretchr/testify/assert"
)

// BenchmarkFilepath-4 96153 11961 ns/op 681 B/op 19 allocs/op
func BenchmarkFilepath(b *testing.B) {
for range b.N {
filepath.WalkDir("./", func(path string, d fs.DirEntry, err error) error { return err })
}
}

// BenchmarkIter-4 86143 14648 ns/op 681 B/op 19 allocs/op
func BenchmarkIter(b *testing.B) {
for range b.N {
WalkDir("./", func(path string, d fs.DirEntry, err error) error { return err })
}
}

func TestWalkDir_File(t *testing.T) {
dir := t.TempDir()
file := filepath.Join(dir, "testfile.txt")
os.WriteFile(file, []byte("hello"), 0644)

var walked []string
err := WalkDir(file, func(path string, d fs.DirEntry, err error) error {
walked = append(walked, path)
return nil
})
assert.NoError(t, err)
assert.Equal(t, []string{file}, walked)
}

func TestWalkDir_Dir(t *testing.T) {
dir := t.TempDir()
os.WriteFile(filepath.Join(dir, "a.txt"), []byte("a"), 0644)
os.WriteFile(filepath.Join(dir, "b.txt"), []byte("b"), 0644)

var walked []string
err := WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
walked = append(walked, path)
return nil
})
assert.NoError(t, err)
assert.Equal(t, len(walked), 3)
}

func TestWalkDir_SkipDir(t *testing.T) {
dir := t.TempDir()
sub := filepath.Join(dir, "subdir")
os.Mkdir(sub, 0755)
os.WriteFile(filepath.Join(sub, "file.txt"), []byte("x"), 0644)

var walked []string
err := WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
walked = append(walked, path)
if d != nil && d.IsDir() && path == sub {
return filepath.SkipDir
}
return nil
})
assert.NoError(t, err)
assert.Contains(t, walked, sub)
assert.NotContains(t, walked, filepath.Join(sub, "file.txt"))
}

func TestWalkDir_SkipDir_Large(t *testing.T) {
if testing.Short() {
t.Skip()
}

dir := t.TempDir()
sub := filepath.Join(dir, "bigsubdir")
os.Mkdir(sub, 0755)
// Create many files in the subdirectory
for i := range readBatchSize + 1 {
os.WriteFile(filepath.Join(sub, "file_"+strconv.Itoa(i)+".txt"), []byte("x"), 0644)
}

var walked []string
err := WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if d != nil && !d.IsDir() {
return filepath.SkipDir
}
if !d.IsDir() {
walked = append(walked, path)
}
return nil
})
assert.NoError(t, err)
assert.Len(t, walked, 0)

// None of the files in the skipped subdir should be walked
for i := 0; i < readBatchSize+1; i++ {
assert.NotContains(t, walked, filepath.Join(sub, "file_"+strconv.Itoa(i)+".txt"))
}
}

func TestWalkDir_Error(t *testing.T) {
err := WalkDir("/nonexistent/path", func(path string, d fs.DirEntry, err error) error {
return err
})
assert.Error(t, err)
}
Loading