blob: 5cc7df53ffc3d01de3efca8bbfa5b39be66fe089 [file] [log] [blame]
Matthias Andreas Benkard832a54e2019-01-29 09:27:38 +01001// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package fastwalk provides a faster version of filepath.Walk for file system
6// scanning tools.
7package fastwalk
8
9import (
10 "errors"
11 "os"
12 "path/filepath"
13 "runtime"
14 "sync"
15)
16
17// TraverseLink is used as a return value from WalkFuncs to indicate that the
18// symlink named in the call may be traversed.
19var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
20
21// Walk is a faster implementation of filepath.Walk.
22//
23// filepath.Walk's design necessarily calls os.Lstat on each file,
24// even if the caller needs less info.
25// Many tools need only the type of each file.
26// On some platforms, this information is provided directly by the readdir
27// system call, avoiding the need to stat each file individually.
28// fastwalk_unix.go contains a fork of the syscall routines.
29//
30// See golang.org/issue/16399
31//
32// Walk walks the file tree rooted at root, calling walkFn for
33// each file or directory in the tree, including root.
34//
35// If fastWalk returns filepath.SkipDir, the directory is skipped.
36//
37// Unlike filepath.Walk:
38// * file stat calls must be done by the user.
39// The only provided metadata is the file type, which does not include
40// any permission bits.
41// * multiple goroutines stat the filesystem concurrently. The provided
42// walkFn must be safe for concurrent use.
43// * fastWalk can follow symlinks if walkFn returns the TraverseLink
44// sentinel error. It is the walkFn's responsibility to prevent
45// fastWalk from going into symlink cycles.
46func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
47 // TODO(bradfitz): make numWorkers configurable? We used a
48 // minimum of 4 to give the kernel more info about multiple
49 // things we want, in hopes its I/O scheduling can take
50 // advantage of that. Hopefully most are in cache. Maybe 4 is
51 // even too low of a minimum. Profile more.
52 numWorkers := 4
53 if n := runtime.NumCPU(); n > numWorkers {
54 numWorkers = n
55 }
56
57 // Make sure to wait for all workers to finish, otherwise
58 // walkFn could still be called after returning. This Wait call
59 // runs after close(e.donec) below.
60 var wg sync.WaitGroup
61 defer wg.Wait()
62
63 w := &walker{
64 fn: walkFn,
65 enqueuec: make(chan walkItem, numWorkers), // buffered for performance
66 workc: make(chan walkItem, numWorkers), // buffered for performance
67 donec: make(chan struct{}),
68
69 // buffered for correctness & not leaking goroutines:
70 resc: make(chan error, numWorkers),
71 }
72 defer close(w.donec)
73
74 for i := 0; i < numWorkers; i++ {
75 wg.Add(1)
76 go w.doWork(&wg)
77 }
78 todo := []walkItem{{dir: root}}
79 out := 0
80 for {
81 workc := w.workc
82 var workItem walkItem
83 if len(todo) == 0 {
84 workc = nil
85 } else {
86 workItem = todo[len(todo)-1]
87 }
88 select {
89 case workc <- workItem:
90 todo = todo[:len(todo)-1]
91 out++
92 case it := <-w.enqueuec:
93 todo = append(todo, it)
94 case err := <-w.resc:
95 out--
96 if err != nil {
97 return err
98 }
99 if out == 0 && len(todo) == 0 {
100 // It's safe to quit here, as long as the buffered
101 // enqueue channel isn't also readable, which might
102 // happen if the worker sends both another unit of
103 // work and its result before the other select was
104 // scheduled and both w.resc and w.enqueuec were
105 // readable.
106 select {
107 case it := <-w.enqueuec:
108 todo = append(todo, it)
109 default:
110 return nil
111 }
112 }
113 }
114 }
115}
116
117// doWork reads directories as instructed (via workc) and runs the
118// user's callback function.
119func (w *walker) doWork(wg *sync.WaitGroup) {
120 defer wg.Done()
121 for {
122 select {
123 case <-w.donec:
124 return
125 case it := <-w.workc:
126 select {
127 case <-w.donec:
128 return
129 case w.resc <- w.walk(it.dir, !it.callbackDone):
130 }
131 }
132 }
133}
134
135type walker struct {
136 fn func(path string, typ os.FileMode) error
137
138 donec chan struct{} // closed on fastWalk's return
139 workc chan walkItem // to workers
140 enqueuec chan walkItem // from workers
141 resc chan error // from workers
142}
143
144type walkItem struct {
145 dir string
146 callbackDone bool // callback already called; don't do it again
147}
148
149func (w *walker) enqueue(it walkItem) {
150 select {
151 case w.enqueuec <- it:
152 case <-w.donec:
153 }
154}
155
156func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
157 joined := dirName + string(os.PathSeparator) + baseName
158 if typ == os.ModeDir {
159 w.enqueue(walkItem{dir: joined})
160 return nil
161 }
162
163 err := w.fn(joined, typ)
164 if typ == os.ModeSymlink {
165 if err == TraverseLink {
166 // Set callbackDone so we don't call it twice for both the
167 // symlink-as-symlink and the symlink-as-directory later:
168 w.enqueue(walkItem{dir: joined, callbackDone: true})
169 return nil
170 }
171 if err == filepath.SkipDir {
172 // Permit SkipDir on symlinks too.
173 return nil
174 }
175 }
176 return err
177}
178
179func (w *walker) walk(root string, runUserCallback bool) error {
180 if runUserCallback {
181 err := w.fn(root, os.ModeDir)
182 if err == filepath.SkipDir {
183 return nil
184 }
185 if err != nil {
186 return err
187 }
188 }
189
190 return readDir(root, w.onDirEnt)
191}