Skip to content

Commit

Permalink
gopls/internal/cache: prime goimports cache asynchronously
Browse files Browse the repository at this point in the history
Gopls' refresh of the goimports resolver already introduces
non-determinism into imports operations: gopls does not observe changes
until the asynchronous refresh occurs.

This change allows operations to continue to run on the stale resolver
until a new resolver is ready.

Due to inherent raciness, it's hard to benchmark the impact of this
change: one would have to catch gopls during a refresh, which occurs at
an automatically adjusted pacing.

Also update TODOs.

Fixes golang/go#59216

Change-Id: I303df998d804c9a1cd1c0e307872d1d271eed601
Reviewed-on: https://go-review.googlesource.com/c/tools/+/561235
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
  • Loading branch information
findleyr committed Feb 6, 2024
1 parent 8b6359d commit 6d4ccf2
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 46 deletions.
20 changes: 11 additions & 9 deletions gopls/internal/cache/imports.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,21 +207,23 @@ func (s *importsState) refreshProcessEnv() {
start := time.Now()

s.mu.Lock()
env := s.processEnv
if resolver, err := s.processEnv.GetResolver(); err == nil {
resolver.ClearForNewScan()
}
// TODO(rfindley): it's not clear why we're unlocking here. Shouldn't we
// guard the use of env below? In any case, we can prime a separate resolver.
resolver, err := s.processEnv.GetResolver()
s.mu.Unlock()
if err != nil {
return
}

event.Log(s.ctx, "background imports cache refresh starting")

// TODO(rfindley, golang/go#59216): do this priming with a separate resolver,
// and then replace, so that we never have to wait on an unprimed cache.
if err := imports.PrimeCache(context.Background(), env); err == nil {
// Prime the new resolver before updating the processEnv, so that gopls
// doesn't wait on an unprimed cache.
if err := imports.PrimeCache(context.Background(), resolver); err == nil {
event.Log(ctx, fmt.Sprintf("background refresh finished after %v", time.Since(start)))
} else {
event.Log(ctx, fmt.Sprintf("background refresh finished after %v", time.Since(start)), keys.Err.Of(err))
}

s.mu.Lock()
s.processEnv.UpdateResolver(resolver)
s.mu.Unlock()
}
29 changes: 16 additions & 13 deletions internal/imports/fix.go
Original file line number Diff line number Diff line change
Expand Up @@ -701,20 +701,21 @@ func ScoreImportPaths(ctx context.Context, env *ProcessEnv, paths []string) (map
return result, nil
}

func PrimeCache(ctx context.Context, env *ProcessEnv) error {
func PrimeCache(ctx context.Context, resolver Resolver) error {
// Fully scan the disk for directories, but don't actually read any Go files.
callback := &scanCallback{
rootFound: func(gopathwalk.Root) bool {
return true
rootFound: func(root gopathwalk.Root) bool {
// See getCandidatePkgs: walking GOROOT is apparently expensive and
// unnecessary.
return root.Type != gopathwalk.RootGOROOT
},
dirFound: func(pkg *pkg) bool {
return false
},
packageNameLoaded: func(pkg *pkg) bool {
return false
},
// packageNameLoaded and exportsLoaded must never be called.
}
return getCandidatePkgs(ctx, callback, "", "", env)

return resolver.scan(ctx, callback)
}

func candidateImportName(pkg *pkg) string {
Expand Down Expand Up @@ -1089,7 +1090,12 @@ type Resolver interface {
// scoreImportPath returns the relevance for an import path.
scoreImportPath(ctx context.Context, path string) float64

ClearForNewScan()
// ClearForNewScan returns a new Resolver based on the receiver that has
// cleared its internal caches of directory contents.
//
// The new resolver should be primed and then set via
// [ProcessEnv.UpdateResolver].
ClearForNewScan() Resolver
}

// A scanCallback controls a call to scan and receives its results.
Expand Down Expand Up @@ -1270,11 +1276,8 @@ func newGopathResolver(env *ProcessEnv) *gopathResolver {
return r
}

func (r *gopathResolver) ClearForNewScan() {
<-r.scanSema
r.cache = NewDirInfoCache()
r.walked = false
r.scanSema <- struct{}{}
func (r *gopathResolver) ClearForNewScan() Resolver {
return newGopathResolver(r.env)
}

func (r *gopathResolver) loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) {
Expand Down
71 changes: 49 additions & 22 deletions internal/imports/mod.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,11 @@ import (
// both caused by populating the cache, albeit in slightly different ways.
//
// A high level list of TODOs:
// - Write an additional benchmark for refreshing the directory state.
// - Split scanning the module cache from other ModuleResolver functionality,
// as it is the source of performance woes (and inconsistency).
// - Allow sharing module cache state across multiple ModuleResolvers.
// - Optimize the scan itself, as there is some redundancy statting and
// reading go.mod files.
// - Make it possible to reuse the current state while running a refresh in
// the background.
// - Fix context cancellation (again): if the context is cancelled while a
// root is being walked, nothing stops that ongoing walk.
// - Invert the relationship between ProcessEnv and Resolver (see the
// docstring of ProcessEnv).
// - Make it easier to use an external resolver implementation.
//
// Smaller TODOs are annotated in the code below.

Expand Down Expand Up @@ -72,7 +67,11 @@ type ModuleResolver struct {
modsByDir []*gocommand.ModuleJSON // ...or by the number of path components in their Dir.

// Scanning state, populated by scan
scanSema chan struct{} // prevents concurrent scans and guards scannedRoots

// scanSema prevents concurrent scans, and guards scannedRoots and the cache
// fields below (though the caches themselves are concurrency safe).
// Receive to acquire, send to release.
scanSema chan struct{}
scannedRoots map[gopathwalk.Root]bool // if true, root has been walked

// Caches of directory info, populated by scans and scan callbacks
Expand All @@ -86,12 +85,16 @@ type ModuleResolver struct {
otherCache *DirInfoCache
}

// newModuleResolver returns a new module-aware goimports resolver.
//
// Note: use caution when modifying this constructor: changes must also be
// reflected in ModuleResolver.ClearForNewScan.
func newModuleResolver(e *ProcessEnv, moduleCacheCache *DirInfoCache) (*ModuleResolver, error) {
r := &ModuleResolver{
env: e,
scanSema: make(chan struct{}, 1),
}
r.scanSema <- struct{}{}
r.scanSema <- struct{}{} // release

goenv, err := r.env.goEnv()
if err != nil {
Expand Down Expand Up @@ -265,10 +268,23 @@ func (r *ModuleResolver) initAllMods() error {
// It preserves the set of roots, but forgets about the set of directories.
// Though it forgets the set of module cache directories, it remembers their
// contents, since they are assumed to be immutable.
func (r *ModuleResolver) ClearForNewScan() {
<-r.scanSema
prevRoots := r.scannedRoots
r.scannedRoots = map[gopathwalk.Root]bool{}
func (r *ModuleResolver) ClearForNewScan() Resolver {
<-r.scanSema // acquire r, to guard scannedRoots
r2 := &ModuleResolver{
env: r.env,
dummyVendorMod: r.dummyVendorMod,
moduleCacheDir: r.moduleCacheDir,
roots: r.roots,
mains: r.mains,
mainByDir: r.mainByDir,
modsByModPath: r.modsByModPath,

scanSema: make(chan struct{}, 1),
scannedRoots: make(map[gopathwalk.Root]bool),
otherCache: NewDirInfoCache(),
moduleCacheCache: r.moduleCacheCache,
}
r2.scanSema <- struct{}{} // r2 must start released
// Invalidate root scans. We don't need to invalidate module cache roots,
// because they are immutable.
// (We don't support a use case where GOMODCACHE is cleaned in the middle of
Expand All @@ -278,12 +294,12 @@ func (r *ModuleResolver) ClearForNewScan() {
// Scanning for new directories in GOMODCACHE should be handled elsewhere,
// via a call to ScanModuleCache.
for _, root := range r.roots {
if root.Type == gopathwalk.RootModuleCache && prevRoots[root] {
r.scannedRoots[root] = true
if root.Type == gopathwalk.RootModuleCache && r.scannedRoots[root] {
r2.scannedRoots[root] = true
}
}
r.otherCache = NewDirInfoCache()
r.scanSema <- struct{}{}
r.scanSema <- struct{}{} // release r
return r2
}

// ClearModuleInfo invalidates resolver state that depends on go.mod file
Expand All @@ -299,16 +315,27 @@ func (e *ProcessEnv) ClearModuleInfo() {
if r, ok := e.resolver.(*ModuleResolver); ok {
resolver, resolverErr := newModuleResolver(e, e.ModCache)
if resolverErr == nil {
<-r.scanSema // guards caches
<-r.scanSema // acquire (guards caches)
resolver.moduleCacheCache = r.moduleCacheCache
resolver.otherCache = r.otherCache
r.scanSema <- struct{}{}
r.scanSema <- struct{}{} // release
}
e.resolver = resolver
e.resolverErr = resolverErr
}
}

// UpdateResolver sets the resolver for the ProcessEnv to use in imports
// operations. Only for use with the result of [Resolver.ClearForNewScan].
//
// TODO(rfindley): this awkward API is a result of the (arguably) inverted
// relationship between configuration and state described in the doc comment
// for [ProcessEnv].
func (e *ProcessEnv) UpdateResolver(r Resolver) {
e.resolver = r
e.resolverErr = nil
}

// findPackage returns the module and directory from within the main modules
// and their dependencies that contains the package at the given import path,
// or returns nil, "" if no module is in scope.
Expand Down Expand Up @@ -580,9 +607,9 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error
select {
case <-ctx.Done():
return
case <-r.scanSema:
case <-r.scanSema: // acquire
}
defer func() { r.scanSema <- struct{}{} }()
defer func() { r.scanSema <- struct{}{} }() // release
// We have the lock on r.scannedRoots, and no other scans can run.
for _, root := range roots {
if ctx.Err() != nil {
Expand Down
4 changes: 2 additions & 2 deletions internal/imports/mod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ import _ "rsc.io/sampler"
// Clear out the resolver's cache, since we've changed the environment.
mt.env.Env["GOFLAGS"] = "-mod=vendor"
mt.env.ClearModuleInfo()
mt.env.resolver.ClearForNewScan()
mt.env.UpdateResolver(mt.env.resolver.ClearForNewScan())
mt.assertModuleFoundInDir("rsc.io/sampler", "sampler", `/vendor/`)
}

Expand Down Expand Up @@ -1314,7 +1314,7 @@ func BenchmarkModuleResolver_RescanModCache(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
scanToSlice(resolver, exclude)
resolver.(*ModuleResolver).ClearForNewScan()
resolver = resolver.ClearForNewScan()
}
}

Expand Down

0 comments on commit 6d4ccf2

Please sign in to comment.